Skip to content

Commit

Permalink
FEAT-modin-project#2520: add groupby microbenchmarks
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Dec 16, 2020
1 parent 7d64e7c commit 5a8ae47
Showing 1 changed file with 23 additions and 8 deletions.
31 changes: 23 additions & 8 deletions asv_bench/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@
import modin.pandas as pd
import numpy as np

from modin.config import CpuCount, TestDatasetSize
from modin.config import TestDatasetSize
from .utils import generate_dataframe, RAND_LOW, RAND_HIGH, random_string

# define `MODIN_CPUS` env var to control the number of partitions
# it should be defined before modin.pandas import

ASV_USE_IMPL = "modin"

if TestDatasetSize.get() == "Big":
Expand Down Expand Up @@ -50,6 +47,26 @@
]


class TimeMultiColumnGroupby:
param_names = ["data_size", "count_columns"]
params = [
UNARY_OP_DATA_SIZE,
[6]
]

def setup(self, data_size, count_columns):
self.df = generate_dataframe(
ASV_USE_IMPL, "int", data_size[1], data_size[0], RAND_LOW, RAND_HIGH
)
self.groupby_columns = [col for col in self.df.columns[:count_columns]]

def time_groupby_agg_quan(self, data_size, count_columns):
self.df.groupby(by=self.groupby_columns).agg("quantile")

def time_groupby_agg_mean(self, data_size, count_columns):
self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean())


class TimeGroupByDefaultAggregations:
param_names = ["data_size"]
params = [
Expand Down Expand Up @@ -199,7 +216,6 @@ def setup(self, data_size, item_length, loc, is_equal_indices):

class TimeSetItem(BaseTimeSetItem):
params = [
["int"],
UNARY_OP_DATA_SIZE,
[1],
["zero", "middle", "last"],
Expand All @@ -217,7 +233,6 @@ def time_setitem_raw(self, *args, **kwargs):

class TimeInsert(BaseTimeSetItem):
params = [
["int"],
UNARY_OP_DATA_SIZE,
[1],
["zero", "middle", "last"],
Expand Down Expand Up @@ -256,6 +271,6 @@ def time_nunique(self, data_size, axis):

def time_apply(self, data_size, axis):
self.df.apply(lambda df: df.sum(), axis=axis)
def time_mean(self, impl, data_type, data_size, axis):

def time_mean(self, data_size, axis):
self.df.mean(axis=axis)

0 comments on commit 5a8ae47

Please sign in to comment.