Skip to content

Commit

Permalink
[SPARK-43606][PS] Remove Int64Index & Float64Index
Browse files Browse the repository at this point in the history
  • Loading branch information
itholic committed Aug 1, 2023
1 parent fd18c34 commit 9b93d57
Show file tree
Hide file tree
Showing 13 changed files with 120 additions and 375 deletions.
3 changes: 0 additions & 3 deletions python/pyspark/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from pyspark.pandas.indexes.category import CategoricalIndex
from pyspark.pandas.indexes.datetimes import DatetimeIndex
from pyspark.pandas.indexes.multi import MultiIndex
from pyspark.pandas.indexes.numeric import Float64Index, Int64Index
from pyspark.pandas.indexes.timedelta import TimedeltaIndex
from pyspark.pandas.series import Series
from pyspark.pandas.groupby import NamedAgg
Expand All @@ -77,8 +76,6 @@
"Series",
"Index",
"MultiIndex",
"Int64Index",
"Float64Index",
"CategoricalIndex",
"DatetimeIndex",
"TimedeltaIndex",
Expand Down
28 changes: 14 additions & 14 deletions python/pyspark/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,8 +904,8 @@ def astype(self: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
1 2
dtype: int64
>>> ser.rename("a").to_frame().set_index("a").index.astype('int64') # doctest: +SKIP
Int64Index([1, 2], dtype='int64', name='a')
>>> ser.rename("a").to_frame().set_index("a").index.astype('int64')
Index([1, 2], dtype='int64', name='a')
"""
return self._dtype_op.astype(self, dtype)

Expand Down Expand Up @@ -1247,8 +1247,8 @@ def shift(
4 23
Name: Col2, dtype: int64
>>> df.index.shift(periods=3, fill_value=0) # doctest: +SKIP
Int64Index([0, 0, 0, 0, 1], dtype='int64')
>>> df.index.shift(periods=3, fill_value=0)
Index([0, 0, 0, 0, 1], dtype='int64')
"""
return self._shift(periods, fill_value).spark.analyzed

Expand Down Expand Up @@ -1341,8 +1341,8 @@ def value_counts(
For Index
>>> idx = ps.Index([3, 1, 2, 3, 4, np.nan])
>>> idx # doctest: +SKIP
Float64Index([3.0, 1.0, 2.0, 3.0, 4.0, nan], dtype='float64')
>>> idx
Index([3.0, 1.0, 2.0, 3.0, 4.0, nan], dtype='float64')
>>> idx.value_counts().sort_index()
1.0 1
Expand Down Expand Up @@ -1511,8 +1511,8 @@ def nunique(self, dropna: bool = True, approx: bool = False, rsd: float = 0.05)
3
>>> idx = ps.Index([1, 1, 2, None])
>>> idx # doctest: +SKIP
Float64Index([1.0, 1.0, 2.0, nan], dtype='float64')
>>> idx
Index([1.0, 1.0, 2.0, nan], dtype='float64')
>>> idx.nunique()
2
Expand Down Expand Up @@ -1586,11 +1586,11 @@ def take(self: IndexOpsLike, indices: Sequence[int]) -> IndexOpsLike:
Index
>>> psidx = ps.Index([100, 200, 300, 400, 500])
>>> psidx # doctest: +SKIP
Int64Index([100, 200, 300, 400, 500], dtype='int64')
>>> psidx
Index([100, 200, 300, 400, 500], dtype='int64')
>>> psidx.take([0, 2, 4]).sort_values() # doctest: +SKIP
Int64Index([100, 300, 500], dtype='int64')
>>> psidx.take([0, 2, 4]).sort_values()
Index([100, 300, 500], dtype='int64')
MultiIndex
Expand Down Expand Up @@ -1684,8 +1684,8 @@ def factorize(
>>> psidx = ps.Index(['b', None, 'a', 'c', 'b'])
>>> codes, uniques = psidx.factorize()
>>> codes # doctest: +SKIP
Int64Index([1, -1, 0, 2, 1], dtype='int64')
>>> codes
Index([1, -1, 0, 2, 1], dtype='int32')
>>> uniques
Index(['a', 'b', 'c'], dtype='object')
"""
Expand Down
8 changes: 4 additions & 4 deletions python/pyspark/pandas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,8 @@ def axes(self) -> List:
--------
>>> df = ps.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.axes # doctest: +SKIP
[Int64Index([0, 1], dtype='int64'), Index(['col1', 'col2'], dtype='object')]
>>> df.axes
[Index([0, 1], dtype='int64'), Index(['col1', 'col2'], dtype='object')]
"""
return [self.index, self.columns]

Expand Down Expand Up @@ -8723,8 +8723,8 @@ def join(
the original DataFrame’s index in the result unlike pandas.
>>> join_psdf = psdf1.join(psdf2.set_index('key'), on='key')
>>> join_psdf.index # doctest: +SKIP
Int64Index([0, 1, 2, 3], dtype='int64')
>>> join_psdf.index
Index([0, 1, 2, 3], dtype='int64')
"""
if isinstance(right, ps.Series):
common = list(self.columns.intersection([right.name]))
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/indexes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@
from pyspark.pandas.indexes.base import Index # noqa: F401
from pyspark.pandas.indexes.datetimes import DatetimeIndex # noqa: F401
from pyspark.pandas.indexes.multi import MultiIndex # noqa: F401
from pyspark.pandas.indexes.numeric import Float64Index, Int64Index # noqa: F401
from pyspark.pandas.indexes.timedelta import TimedeltaIndex # noqa: F401
Loading

0 comments on commit 9b93d57

Please sign in to comment.