Skip to content

Commit

Permalink
BUG: ensure Series.str raises TypeError for inappropriate dtype
Browse files Browse the repository at this point in the history
Fixes GH9184

Also includes a fix for Series.apply to ensure that it propagates
metadata and dtypes properly for empty Series (this was necessary to
fix a Stata test)
  • Loading branch information
shoyer committed Jan 23, 2015
1 parent 9b5a5ea commit b7a6d1b
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 22 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ Bug Fixes
- Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`)
- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`)
- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`)
- Accessing ``Series.str`` methods on with non-string values now raises ``TypeError`` instead of producing incorrect results (:issue:`9184`)

- Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`)

Expand Down
9 changes: 8 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2045,7 +2045,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
y : Series or DataFrame if func returns a Series
"""
if len(self) == 0:
return Series()
return self._constructor(dtype=self.dtype,
index=self.index).__finalize__(self)

if kwds or args and not isinstance(func, np.ufunc):
f = lambda x: func(x, *args, **kwds)
Expand Down Expand Up @@ -2504,6 +2505,12 @@ def to_period(self, freq=None, copy=True):
# string methods

def _make_str_accessor(self):
if not com.is_object_dtype(self.dtype):
# this really should exclude all series with any non-string values,
# but that isn't practical for performance reasons until we have a
# str dtype (GH 9343)
raise TypeError("Can only use .str accessor with string values, "
"which use np.object_ dtype in pandas")
return StringMethods(self)

str = base.AccessorProperty(StringMethods, _make_str_accessor)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2522,6 +2522,8 @@ def test_cat_accessor_api(self):
self.assertIs(Series.cat, CategoricalAccessor)
s = Series(list('aabbcde')).astype('category')
self.assertIsInstance(s.cat, CategoricalAccessor)
with tm.assertRaisesRegexp(TypeError, "only use .cat accessor"):
Series([1]).cat

def test_pickle_v0_14_1(self):
cat = pd.Categorical(values=['a', 'b', 'c'],
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,9 @@ def test_dt_accessor_api(self):
s = Series(date_range('2000-01-01', periods=3))
self.assertIsInstance(s.dt, DatetimeProperties)

with tm.assertRaisesRegexp(TypeError, "only use .dt accessor"):
Series([1]).dt

def test_binop_maybe_preserve_name(self):

# names match, preserve
Expand Down Expand Up @@ -5411,9 +5414,14 @@ def test_apply(self):
tm.assert_frame_equal(result, expected)

# empty series
s = Series()
s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
rs = s.apply(lambda x: x)
tm.assert_series_equal(s, rs)
# check all metadata (GH 9322)
self.assertIsNot(s, rs)
self.assertIs(s.index, rs.index)
self.assertEqual(s.dtype, rs.dtype)
self.assertEqual(s.name, rs.name)

# index but no data
s = Series(index=[1, 2, 3])
Expand Down
24 changes: 4 additions & 20 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ def test_api(self):
self.assertIs(Series.str, strings.StringMethods)
self.assertIsInstance(Series(['']).str, strings.StringMethods)

# GH 9184
with tm.assertRaisesRegexp(TypeError, "only use .str accessor"):
Series([1]).str

def test_iter(self):
# GH3638
strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
Expand Down Expand Up @@ -80,26 +84,6 @@ def test_iter_single_element(self):
self.assertFalse(i)
assert_series_equal(ds, s)

def test_iter_numeric_try_string(self):
# behavior identical to empty series
dsi = Series(lrange(4))

i, s = 100, 'h'

for i, s in enumerate(dsi.str):
pass

self.assertEqual(i, 100)
self.assertEqual(s, 'h')

dsf = Series(np.arange(4.))

for i, s in enumerate(dsf.str):
pass

self.assertEqual(i, 100)
self.assertEqual(s, 'h')

def test_iter_object_try_string(self):
ds = Series([slice(None, randint(10), randint(10, 20))
for _ in range(4)])
Expand Down

0 comments on commit b7a6d1b

Please sign in to comment.