Skip to content

Commit

Permalink
Change UInt64Index._na_value from 0 to np.nan (#18401)
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel authored and jreback committed Nov 24, 2017
1 parent e728f94 commit aaee541
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 130 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Backwards incompatible API changes

- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`)
-


Expand Down Expand Up @@ -129,7 +130,7 @@ Bug Fixes
Conversion
^^^^^^^^^^

-
- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
-
-

Expand Down
25 changes: 14 additions & 11 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
# then coerce to integer.
try:
return cls._try_convert_to_int_index(
data, copy, name)
data, copy, name, dtype)
except ValueError:
pass

Expand Down Expand Up @@ -307,7 +307,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
if inferred == 'integer':
try:
return cls._try_convert_to_int_index(
subarr, copy, name)
subarr, copy, name, dtype)
except ValueError:
pass

Expand Down Expand Up @@ -664,7 +664,7 @@ def ravel(self, order='C'):

# construction helpers
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
def _try_convert_to_int_index(cls, data, copy, name, dtype):
"""
Attempt to convert an array of data into an integer index.
Expand All @@ -685,15 +685,18 @@ def _try_convert_to_int_index(cls, data, copy, name):
"""

from .numeric import Int64Index, UInt64Index
try:
res = data.astype('i8', copy=False)
if (res == data).all():
return Int64Index(res, copy=copy, name=name)
except (OverflowError, TypeError, ValueError):
pass
if not is_unsigned_integer_dtype(dtype):
# skip int64 conversion attempt if uint-like dtype is passed, as
# this could return Int64Index when UInt64Index is what's desrired
try:
res = data.astype('i8', copy=False)
if (res == data).all():
return Int64Index(res, copy=copy, name=name)
except (OverflowError, TypeError, ValueError):
pass

# Conversion to int64 failed (possibly due to
# overflow), so let's try now with uint64.
# Conversion to int64 failed (possibly due to overflow) or was skipped,
# so let's try now with uint64.
try:
res = data.astype('u8', copy=False)
if (res == data).all():
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ class UInt64Index(NumericIndex):
_inner_indexer = libjoin.inner_join_indexer_uint64
_outer_indexer = libjoin.outer_join_indexer_uint64
_can_hold_na = False
_na_value = 0
_engine_type = libindex.UInt64Engine
_default_dtype = np.uint64

Expand Down
28 changes: 8 additions & 20 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@

from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index,
RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex,
TimedeltaIndex, PeriodIndex, IntervalIndex,
notna, isna)
TimedeltaIndex, PeriodIndex, IntervalIndex, isna)
from pandas.core.indexes.base import InvalidIndexError
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
from pandas.core.dtypes.common import needs_i8_conversion
Expand Down Expand Up @@ -529,31 +528,20 @@ def test_numpy_repeat(self):
tm.assert_raises_regex(ValueError, msg, np.repeat,
i, rep, axis=0)

def test_where(self):
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
result = i.where(notna(i))

cond = [True] * len(i)
result = i.where(klass(cond))
expected = i
tm.assert_index_equal(result, expected)

_nan = i._na_value
cond = [False] + [True] * len(i[1:])
expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype)

result = i.where(cond)
expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()

_nan = i._na_value
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype)

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_setops_errorcases(self):
for name, idx in compat.iteritems(self.indices):
# # non-iterable input
Expand Down
23 changes: 7 additions & 16 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,18 @@ def test_pickle_round_trip(self):
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)

def test_where(self):
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
result = i.where(notna(i))
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
freq='D')
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, Series]
expected = pd.PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D')

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
expected = PeriodIndex([NaT] + i[1:].tolist(), freq='D')
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_where_other(self):

Expand Down
17 changes: 16 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime, timedelta

import pandas.util.testing as tm
from pandas.core.dtypes.common import is_unsigned_integer_dtype
from pandas.core.indexes.api import Index, MultiIndex
from pandas.tests.indexes.common import Base

Expand All @@ -14,7 +15,7 @@
import numpy as np

from pandas import (period_range, date_range, Series,
DataFrame, Float64Index, Int64Index,
DataFrame, Float64Index, Int64Index, UInt64Index,
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
PeriodIndex, isna)
from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
Expand Down Expand Up @@ -201,6 +202,20 @@ def __array__(self, dtype=None):
result = pd.Index(ArrayLike(array))
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('dtype', [
int, 'int64', 'int32', 'int16', 'int8', 'uint64', 'uint32',
'uint16', 'uint8'])
def test_constructor_int_dtype_float(self, dtype):
# GH 18400
if is_unsigned_integer_dtype(dtype):
index_type = UInt64Index
else:
index_type = Int64Index

expected = index_type([0, 1, 2, 3])
result = Index([0., 1., 2., 3.], dtype=dtype)
tm.assert_index_equal(result, expected)

def test_constructor_int_dtype_nan(self):
# see gh-15187
data = [np.nan]
Expand Down
27 changes: 9 additions & 18 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np

from pandas import Categorical, IntervalIndex, compat, notna
from pandas import Categorical, IntervalIndex, compat
from pandas.util.testing import assert_almost_equal
import pandas.core.config as cf
import pandas as pd
Expand Down Expand Up @@ -269,28 +269,19 @@ def f(x):
ordered=False)
tm.assert_index_equal(result, exp)

def test_where(self):
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
def test_where(self, klass):
i = self.create_index()
result = i.where(notna(i))
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(),
categories=i.categories)
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.CategoricalIndex([np.nan] + i[1:].tolist(),
categories=i.categories)

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
expected = CategoricalIndex([np.nan] + i[1:].tolist(),
categories=i.categories)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_append(self):

Expand Down
19 changes: 9 additions & 10 deletions pandas/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,20 +348,19 @@ def test_astype(self, closed):
expected = pd.Categorical(idx, ordered=True)
tm.assert_categorical_equal(result, expected)

def test_where(self, closed):
expected = self.create_index(closed=closed)
result = expected.where(expected.notna())
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
def test_where(self, closed, klass):
idx = self.create_index(closed=closed)
cond = [True] * len(idx)
expected = idx
result = expected.where(klass(cond))
tm.assert_index_equal(result, expected)

idx = IntervalIndex.from_breaks([1, 2], closed=closed)
result = idx.where([True, False])
expected = IntervalIndex.from_intervals(
[Interval(1.0, 2.0, closed=closed), np.nan])
cond = [False] + [True] * len(idx[1:])
expected = IntervalIndex([np.nan] + idx[1:].tolist())
result = idx.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
pass

def test_delete(self, closed):
expected = IntervalIndex.from_breaks([1, 2], closed=closed)
result = self.create_index(closed=closed).delete(0)
Expand Down
39 changes: 13 additions & 26 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas import (date_range, notna, Series, Index, Float64Index,
from pandas import (date_range, Series, Index, Float64Index,
Int64Index, UInt64Index, RangeIndex)

import pandas.util.testing as tm
Expand Down Expand Up @@ -175,6 +175,18 @@ def test_modulo(self):
expected = Index(index.values % 2)
tm.assert_index_equal(index % 2, expected)

@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))

cond = [False] + [True] * (len(i) - 1)
expected = Float64Index([i._na_value] + i[1:].tolist())
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)


class TestFloat64Index(Numeric):
_holder = Float64Index
Expand Down Expand Up @@ -726,31 +738,6 @@ def test_coerce_list(self):
arr = Index([1, 2, 3, 4], dtype=object)
assert isinstance(arr, Index)

def test_where(self):
i = self.create_index()
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)

_nan = i._na_value
cond = [False] + [True] * len(i[1:])
expected = pd.Index([_nan] + i[1:].tolist())

result = i.where(cond)
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()

_nan = i._na_value
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.Index([_nan] + i[1:].tolist())

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_get_indexer(self):
target = Int64Index(np.arange(10))
indexer = self.index.get_indexer(target)
Expand Down
27 changes: 1 addition & 26 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import numpy as np

from pandas import (isna, notna, Series, Index, Float64Index,
from pandas import (isna, Series, Index, Float64Index,
Int64Index, RangeIndex)

import pandas.util.testing as tm
Expand Down Expand Up @@ -934,31 +934,6 @@ def test_len_specialised(self):
i = RangeIndex(0, 5, step)
assert len(i) == 0

def test_where(self):
i = self.create_index()
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)

_nan = i._na_value
cond = [False] + [True] * len(i[1:])
expected = pd.Index([_nan] + i[1:].tolist())

result = i.where(cond)
tm.assert_index_equal(result, expected)

def test_where_array_like(self):
i = self.create_index()

_nan = i._na_value
cond = [False] + [True] * (len(i) - 1)
klasses = [list, tuple, np.array, pd.Series]
expected = pd.Index([_nan] + i[1:].tolist())

for klass in klasses:
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)

def test_append(self):
# GH16212
RI = RangeIndex
Expand Down

0 comments on commit aaee541

Please sign in to comment.