[SPARK-43606][PS] Remove Int64Index & Float64Index

itholic · Aug 1, 2023 · 9b93d57 · 9b93d57
1 parent fd18c34
commit 9b93d57
Show file tree

Hide file tree

Showing 13 changed files with 120 additions and 375 deletions.
diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py
@@ -61,7 +61,6 @@
 from pyspark.pandas.indexes.category import CategoricalIndex
 from pyspark.pandas.indexes.datetimes import DatetimeIndex
 from pyspark.pandas.indexes.multi import MultiIndex
-from pyspark.pandas.indexes.numeric import Float64Index, Int64Index
 from pyspark.pandas.indexes.timedelta import TimedeltaIndex
 from pyspark.pandas.series import Series
 from pyspark.pandas.groupby import NamedAgg
@@ -77,8 +76,6 @@
     "Series",
     "Index",
     "MultiIndex",
-    "Int64Index",
-    "Float64Index",
     "CategoricalIndex",
     "DatetimeIndex",
     "TimedeltaIndex",

diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
@@ -904,8 +904,8 @@ def astype(self: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
         1    2
         dtype: int64
 
-        >>> ser.rename("a").to_frame().set_index("a").index.astype('int64')  # doctest: +SKIP
-        Int64Index([1, 2], dtype='int64', name='a')
+        >>> ser.rename("a").to_frame().set_index("a").index.astype('int64')
+        Index([1, 2], dtype='int64', name='a')
         """
         return self._dtype_op.astype(self, dtype)
 
@@ -1247,8 +1247,8 @@ def shift(
         4    23
         Name: Col2, dtype: int64
 
-        >>> df.index.shift(periods=3, fill_value=0)  # doctest: +SKIP
-        Int64Index([0, 0, 0, 0, 1], dtype='int64')
+        >>> df.index.shift(periods=3, fill_value=0)
+        Index([0, 0, 0, 0, 1], dtype='int64')
         """
         return self._shift(periods, fill_value).spark.analyzed
 
@@ -1341,8 +1341,8 @@ def value_counts(
         For Index
 
         >>> idx = ps.Index([3, 1, 2, 3, 4, np.nan])
-        >>> idx  # doctest: +SKIP
-        Float64Index([3.0, 1.0, 2.0, 3.0, 4.0, nan], dtype='float64')
+        >>> idx
+        Index([3.0, 1.0, 2.0, 3.0, 4.0, nan], dtype='float64')
 
         >>> idx.value_counts().sort_index()
         1.0    1
@@ -1511,8 +1511,8 @@ def nunique(self, dropna: bool = True, approx: bool = False, rsd: float = 0.05)
         3
 
         >>> idx = ps.Index([1, 1, 2, None])
-        >>> idx  # doctest: +SKIP
-        Float64Index([1.0, 1.0, 2.0, nan], dtype='float64')
+        >>> idx
+        Index([1.0, 1.0, 2.0, nan], dtype='float64')
 
         >>> idx.nunique()
         2
@@ -1586,11 +1586,11 @@ def take(self: IndexOpsLike, indices: Sequence[int]) -> IndexOpsLike:
         Index
 
         >>> psidx = ps.Index([100, 200, 300, 400, 500])
-        >>> psidx  # doctest: +SKIP
-        Int64Index([100, 200, 300, 400, 500], dtype='int64')
+        >>> psidx
+        Index([100, 200, 300, 400, 500], dtype='int64')
 
-        >>> psidx.take([0, 2, 4]).sort_values()  # doctest: +SKIP
-        Int64Index([100, 300, 500], dtype='int64')
+        >>> psidx.take([0, 2, 4]).sort_values()
+        Index([100, 300, 500], dtype='int64')
 
         MultiIndex
 
@@ -1684,8 +1684,8 @@ def factorize(
 
         >>> psidx = ps.Index(['b', None, 'a', 'c', 'b'])
         >>> codes, uniques = psidx.factorize()
-        >>> codes  # doctest: +SKIP
-        Int64Index([1, -1, 0, 2, 1], dtype='int64')
+        >>> codes
+        Index([1, -1, 0, 2, 1], dtype='int32')
         >>> uniques
         Index(['a', 'b', 'c'], dtype='object')
         """

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
@@ -734,8 +734,8 @@ def axes(self) -> List:
         --------
 
         >>> df = ps.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.axes  # doctest: +SKIP
-        [Int64Index([0, 1], dtype='int64'), Index(['col1', 'col2'], dtype='object')]
+        >>> df.axes
+        [Index([0, 1], dtype='int64'), Index(['col1', 'col2'], dtype='object')]
         """
         return [self.index, self.columns]
 
@@ -8723,8 +8723,8 @@ def join(
         the original DataFrame’s index in the result unlike pandas.
 
         >>> join_psdf = psdf1.join(psdf2.set_index('key'), on='key')
-        >>> join_psdf.index  # doctest: +SKIP
-        Int64Index([0, 1, 2, 3], dtype='int64')
+        >>> join_psdf.index
+        Index([0, 1, 2, 3], dtype='int64')
         """
         if isinstance(right, ps.Series):
             common = list(self.columns.intersection([right.name]))

diff --git a/python/pyspark/pandas/indexes/__init__.py b/python/pyspark/pandas/indexes/__init__.py
@@ -17,5 +17,4 @@
 from pyspark.pandas.indexes.base import Index  # noqa: F401
 from pyspark.pandas.indexes.datetimes import DatetimeIndex  # noqa: F401
 from pyspark.pandas.indexes.multi import MultiIndex  # noqa: F401
-from pyspark.pandas.indexes.numeric import Float64Index, Int64Index  # noqa: F401
 from pyspark.pandas.indexes.timedelta import TimedeltaIndex  # noqa: F401