Skip to content

Commit

Permalink
TEST-#2686: add fillna benchmark (#2687)
Browse files Browse the repository at this point in the history
* TEST-#2686: add fillna benchmark

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>

* TEST-#2686: reply to review comments

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>

* TEST-#2686: add inplace parameter

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Feb 5, 2021
1 parent 77d40ce commit 16fa188
Showing 1 changed file with 27 additions and 6 deletions.
33 changes: 27 additions & 6 deletions asv_bench/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
ASV_USE_IMPL = os.environ.get("MODIN_ASV_USE_IMPL", "modin")
ASV_DATASET_SIZE = os.environ.get("MODIN_TEST_DATASET_SIZE", "Small")

assert ASV_USE_IMPL in ("modin", "pandas")

BINARY_OP_DATA_SIZE = {
"Big": [
((5000, 5000), (5000, 5000)),
Expand Down Expand Up @@ -82,6 +84,11 @@
"Small": 5,
}

IMPL = {
"modin": pd,
"pandas": pandas,
}


def execute(df):
"Make sure the calculations are done."
Expand Down Expand Up @@ -217,12 +224,7 @@ def setup(self, shapes, how, axis):
)

def time_concat(self, shapes, how, axis):
if ASV_USE_IMPL == "modin":
execute(pd.concat([self.df1, self.df2], axis=axis, join=how))
elif ASV_USE_IMPL == "pandas":
execute(pandas.concat([self.df1, self.df2], axis=axis, join=how))
else:
raise NotImplementedError
execute(IMPL[ASV_USE_IMPL].concat([self.df1, self.df2], axis=axis, join=how))


class TimeBinaryOp:
Expand Down Expand Up @@ -359,3 +361,22 @@ def setup(self, shape, columns_number, ascending_list):

def time_sort_values(self, shape, columns_number, ascending_list):
execute(self.df.sort_values(self.columns, ascending=self.ascending))


class TimeFillna:
param_names = ["shape", "limit", "inplace"]
params = [UNARY_OP_DATA_SIZE[ASV_DATASET_SIZE], [None, 0.8], [False, True]]

def setup(self, shape, limit, inplace):
pd = IMPL[ASV_USE_IMPL]
columns = [f"col{x}" for x in range(shape[1])]
self.df = pd.DataFrame(np.nan, index=pd.RangeIndex(shape[0]), columns=columns)
self.limit = int(limit * shape[0]) if limit else None

def time_fillna(self, shape, limit, inplace):
kw = {"value": 0.0, "limit": self.limit, "inplace": inplace}
if inplace:
self.df.fillna(**kw)
execute(self.df)
else:
execute(self.df.fillna(**kw))

0 comments on commit 16fa188

Please sign in to comment.