Skip to content

Commit

Permalink
TEST-#2702: add loc/iloc benchmark (#2703)
Browse files Browse the repository at this point in the history
* TEST-#2702: add loc/iloc benchmark

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>

* TEST-#2702: add multiindex loc bench

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>

* TEST-#2702: add row_loc check

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Feb 10, 2021
1 parent 6f1fe69 commit a54875a
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 1 deletion.
59 changes: 59 additions & 0 deletions asv_bench/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,65 @@ def time_value_counts(self, shape, bins):
execute(self.df.value_counts(bins=bins))


class TimeIndexing:
param_names = ["shape", "indexer_type"]
params = [
UNARY_OP_DATA_SIZE[ASV_DATASET_SIZE],
[
"scalar",
"bool",
"slice",
"list",
"function",
],
]

def setup(self, shape, indexer_type):
self.df = generate_dataframe(ASV_USE_IMPL, "int", *shape, RAND_LOW, RAND_HIGH)
if indexer_type == "bool":
self.indexer = [False, True] * (shape[0] // 2)
elif indexer_type == "scalar":
self.indexer = shape[0] // 2
elif indexer_type == "slice":
self.indexer = slice(0, shape[0], 2)
elif indexer_type == "list":
self.indexer = [x for x in range(shape[0])]
elif indexer_type == "function":
self.indexer = lambda df: df.index[::-2]

def time_iloc(self, shape, indexer_type):
execute(self.df.iloc[self.indexer])

def time_loc(self, shape, indexer_type):
execute(self.df.loc[self.indexer])


class TimeMultiIndexing:
param_names = ["shape"]
params = [UNARY_OP_DATA_SIZE[ASV_DATASET_SIZE]]

def setup(self, shape):
df = generate_dataframe(ASV_USE_IMPL, "int", *shape, RAND_LOW, RAND_HIGH)

index = pd.MultiIndex.from_product([df.index[: shape[0] // 2], ["bar", "foo"]])
columns = pd.MultiIndex.from_product(
[df.columns[: shape[1] // 2], ["buz", "fuz"]]
)

df.index = index
df.columns = columns

self.df = df.sort_index(axis=1)

def time_multiindex_loc(self, shape):
execute(
self.df.loc[
self.df.index[2] : self.df.index[-2],
self.df.columns[2] : self.df.columns[-2],
]
)


class TimeAstype:
param_names = ["shape", "dtype", "astype_ncolumns"]
params = [
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ def __getitem__(self, key):
)
):
result.index = result.index.droplevel(list(range(len(col_loc))))
elif all(
elif not isinstance(row_loc, slice) and all(
not isinstance(row_loc[i], slice)
and row_loc[i] in result.index.levels[i]
for i in range(len(row_loc))
Expand Down

0 comments on commit a54875a

Please sign in to comment.