Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#2550: remove decorators usage for asv tested functions #2551

Merged
merged 1 commit into from
Dec 17, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 26 additions & 45 deletions asv_bench/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,9 @@
]


def trigger_execution(func):
def real_executor(*arg, **kwargs):
return func(*arg, **kwargs).shape

return real_executor
def execute(df):
"Make sure the calculations are done."
return df.shape


class TimeMultiColumnGroupby:
Expand All @@ -69,13 +67,11 @@ def setup(self, data_size, count_columns):
)
self.groupby_columns = [col for col in self.df.columns[:count_columns]]

@trigger_execution
def time_groupby_agg_quan(self, data_size, count_columns):
return self.df.groupby(by=self.groupby_columns).agg("quantile")
execute(self.df.groupby(by=self.groupby_columns).agg("quantile"))

@trigger_execution
def time_groupby_agg_mean(self, data_size, count_columns):
return self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean())
execute(self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean()))


class TimeGroupByDefaultAggregations:
Expand All @@ -90,21 +86,17 @@ def setup(self, data_size):
)
self.groupby_column = self.df.columns[0]

@trigger_execution
def time_groupby_count(self, data_size):
return self.df.groupby(by=self.groupby_column).count()
execute(self.df.groupby(by=self.groupby_column).count())

@trigger_execution
def time_groupby_size(self, data_size):
return self.df.groupby(by=self.groupby_column).size()
execute(self.df.groupby(by=self.groupby_column).size())

@trigger_execution
def time_groupby_sum(self, data_size):
return self.df.groupby(by=self.groupby_column).sum()
execute(self.df.groupby(by=self.groupby_column).sum())

@trigger_execution
def time_groupby_mean(self, data_size):
return self.df.groupby(by=self.groupby_column).mean()
execute(self.df.groupby(by=self.groupby_column).mean())


class TimeJoin:
Expand All @@ -123,10 +115,11 @@ def setup(self, data_size, how, sort):
ASV_USE_IMPL, "int", data_size[3], data_size[2], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_join(self, data_size, how, sort):
return self.df1.join(
self.df2, on=self.df1.columns[0], how=how, lsuffix="left_", sort=sort
execute(
self.df1.join(
self.df2, on=self.df1.columns[0], how=how, lsuffix="left_", sort=sort
)
)


Expand All @@ -146,9 +139,8 @@ def setup(self, data_size, how, sort):
ASV_USE_IMPL, "int", data_size[3], data_size[2], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_merge(self, data_size, how, sort):
return self.df1.merge(self.df2, on=self.df1.columns[0], how=how, sort=sort)
execute(self.df1.merge(self.df2, on=self.df1.columns[0], how=how, sort=sort))


class TimeConcat:
Expand All @@ -168,12 +160,11 @@ def setup(self, data_size, how, axis):
ASV_USE_IMPL, "int", data_size[3], data_size[2], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_concat(self, data_size, how, axis):
if ASV_USE_IMPL == "modin":
return pd.concat([self.df1, self.df2], axis=axis, join=how)
execute(pd.concat([self.df1, self.df2], axis=axis, join=how))
elif ASV_USE_IMPL == "pandas":
return pandas.concat([self.df1, self.df2], axis=axis, join=how)
execute(pandas.concat([self.df1, self.df2], axis=axis, join=how))
else:
raise NotImplementedError

Expand All @@ -196,9 +187,8 @@ def setup(self, data_size, binary_op, axis):
)
self.op = getattr(self.df1, binary_op)

@trigger_execution
def time_binary_op(self, data_size, binary_op, axis):
return self.op(self.df2, axis=axis)
execute(self.op(self.df2, axis=axis))


class BaseTimeSetItem:
Expand Down Expand Up @@ -243,15 +233,13 @@ class TimeSetItem(BaseTimeSetItem):
[True, False],
]

@trigger_execution
def time_setitem_qc(self, *args, **kwargs):
self.df[self.loc] = self.item
return self.df
execute(self.df)

@trigger_execution
def time_setitem_raw(self, *args, **kwargs):
self.df[self.loc] = self.item_raw
return self.df
execute(self.df)


class TimeInsert(BaseTimeSetItem):
Expand All @@ -262,15 +250,13 @@ class TimeInsert(BaseTimeSetItem):
[True, False],
]

@trigger_execution
def time_insert_qc(self, *args, **kwargs):
self.df.insert(loc=self.iloc, column=random_string(), value=self.item)
return self.df
execute(self.df)

@trigger_execution
def time_insert_raw(self, *args, **kwargs):
self.df.insert(loc=self.iloc, column=random_string(), value=self.item_raw)
return self.df
execute(self.df)


class TimeArithmetic:
Expand All @@ -285,22 +271,17 @@ def setup(self, data_size, axis):
ASV_USE_IMPL, "int", data_size[1], data_size[0], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_sum(self, data_size, axis):
return self.df.sum(axis=axis)
execute(self.df.sum(axis=axis))

@trigger_execution
def time_median(self, data_size, axis):
return self.df.median(axis=axis)
execute(self.df.median(axis=axis))

@trigger_execution
def time_nunique(self, data_size, axis):
return self.df.nunique(axis=axis)
execute(self.df.nunique(axis=axis))

@trigger_execution
def time_apply(self, data_size, axis):
return self.df.apply(lambda df: df.sum(), axis=axis)
execute(self.df.apply(lambda df: df.sum(), axis=axis))

@trigger_execution
def time_mean(self, data_size, axis):
return self.df.mean(axis=axis)
execute(self.df.mean(axis=axis))