Skip to content

Commit

Permalink
FEAT-#2363: introduce getter and setter for index name (#2368)
Browse files Browse the repository at this point in the history
Signed-off-by: ienkovich <ilya.enkovich@intel.com>
  • Loading branch information
ienkovich committed Nov 3, 2020
1 parent 5382769 commit 8c00a8f
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 3 deletions.
44 changes: 44 additions & 0 deletions modin/backends/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,50 @@ def has_multiindex(self, axis=0):
assert axis == 1
return isinstance(self.columns, pandas.MultiIndex)

def get_index_name(self):
"""
Get index name.
Returns
-------
hashable
Index name, None for MultiIndex.
"""
return self.index.name

def set_index_name(self, name):
"""
Set index name.
Parameters
----------
name: hashable
New index name.
"""
self.index.name = name

def get_index_names(self):
"""
Get index names.
Returns
-------
list
Index names.
"""
return self.index.names

def set_index_names(self, names):
"""
Set index names.
Parameters
----------
names: list
New index names.
"""
self.index.names = names

# DateTime methods

dt_ceil = DateTimeDefault.register(pandas.Series.dt.ceil)
Expand Down
12 changes: 12 additions & 0 deletions modin/experimental/backends/omnisci/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,18 @@ def has_multiindex(self, axis=0):
assert axis == 1
return isinstance(self.columns, pandas.MultiIndex)

def get_index_name(self):
return self._modin_frame.get_index_name()

def set_index_name(self, name):
self._modin_frame = self._modin_frame.set_index_name(name)

def get_index_names(self):
return self._modin_frame.get_index_names()

def set_index_names(self, names):
self._modin_frame = self._modin_frame.set_index_names(names)

def free(self):
return

Expand Down
63 changes: 63 additions & 0 deletions modin/experimental/engines/omnisci_on_ray/frame/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,69 @@ def has_multiindex(self):
return isinstance(self._index_cache, MultiIndex)
return self._index_cols is not None and len(self._index_cols) > 1

def get_index_name(self):
if self._index_cols is None:
return None
if len(self._index_cols) > 1:
return None
return self._index_cols[0]

def set_index_name(self, name):
if self.has_multiindex():
ErrorMessage.single_warning("Scalar name for MultiIndex is not supported!")
return self

if self._index_cols is None and name is None:
return self

names = self._mangle_index_names([name])
if self._index_cols is None:
exprs = OrderedDict()
exprs[name] = self.ref("__rowid__")
else:
exprs = self._index_exprs()

for col in self.columns:
exprs[col] = self.ref(col)

return self.__constructor__(
columns=self.columns,
dtypes=self._dtypes_for_exprs(exprs),
op=TransformNode(self, exprs),
index_cols=names,
uses_rowid=self._index_cols is None,
force_execution_mode=self._force_execution_mode,
)

def get_index_names(self):
if self.has_multiindex():
return self._index_cols.copy()
return [self.get_index_name()]

def set_index_names(self, names):
if not self.has_multiindex():
raise ValueError("Can set names for MultiIndex only")

if len(names) != len(self._index_cols):
raise ValueError(
f"Unexpected names count: expected {len(self._index_cols)} got {len(names)}"
)

names = self._mangle_index_names(names)
exprs = OrderedDict()
for old, new in zip(self._index_cols, names):
exprs[new] = self.ref(old)
for col in self.columns:
exprs[col] = self.ref(col)

return self.__constructor__(
columns=self.columns,
dtypes=self._dtypes_for_exprs(exprs),
op=TransformNode(self, exprs),
index_cols=names,
force_execution_mode=self._force_execution_mode,
)

def to_pandas(self):
self._execute()

Expand Down
24 changes: 24 additions & 0 deletions modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,30 @@ def applier(lib):

eval_general(pd, pandas, applier)

def test_set_index_name(self):
index = pandas.Index.__new__(pandas.Index, data=[i for i in range(24)])

pandas_df = pandas.DataFrame(self.data, index=index)
pandas_df.index.name = "new_name"
modin_df = pd.DataFrame(self.data, index=index)
modin_df._query_compiler.set_index_name("new_name")

df_equals(pandas_df, modin_df)

def test_set_index_names(self):
index = pandas.MultiIndex.from_tuples(
[(i, j, k) for i in range(2) for j in range(3) for k in range(4)]
)

pandas_df = pandas.DataFrame(self.data, index=index)
pandas_df.index.names = ["new_name1", "new_name2", "new_name3"]
modin_df = pd.DataFrame(self.data, index=index)
modin_df._query_compiler.set_index_names(
["new_name1", "new_name2", "new_name3"]
)

df_equals(pandas_df, modin_df)


class TestFillna:
data = {"a": [1, 1, None], "b": [None, None, 2], "c": [3, None, None]}
Expand Down
6 changes: 3 additions & 3 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,10 +909,10 @@ def _apply_agg_function(self, f, drop=True, *args, **kwargs):
drop=self._drop,
)
if self._idx_name is not None and self._as_index:
new_manager.index.name = self._idx_name
new_manager.set_index_name(self._idx_name)
result = type(self._df)(query_compiler=new_manager)
if result.index.name == "__reduced__":
result.index.name = None
if result._query_compiler.get_index_name() == "__reduced__":
result._query_compiler.set_index_name(None)
if self._kwargs.get("squeeze", False):
return result.squeeze()
return result
Expand Down

0 comments on commit 8c00a8f

Please sign in to comment.