Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Raise TypeError when converting mixed-type DataFrames, use proper NA types, close #1282 #1283

Closed
wants to merge 6 commits into from
60 changes: 55 additions & 5 deletions pandas/rpy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,18 @@ def convert_robj(obj, use_pandas=True):
np.int32: robj.IntVector,
np.int64: robj.IntVector,
np.object_: robj.StrVector,
np.str: robj.StrVector}
np.str: robj.StrVector,
np.bool: robj.BoolVector}

NA_TYPES = {np.float64: robj.NA_Real,
np.float32: robj.NA_Real,
np.float: robj.NA_Real,
np.int: robj.NA_Integer,
np.int32: robj.NA_Integer,
np.int64: robj.NA_Integer,
np.object_: robj.NA_Character,
np.str: robj.NA_Character,
np.bool: robj.NA_Logical}

def convert_to_r_dataframe(df, strings_as_factors=False):
"""
Expand All @@ -207,8 +218,9 @@ def convert_to_r_dataframe(df, strings_as_factors=False):
for column in df:
value = df[column]
value_type = value.dtype.type
value = [item if pn.notnull(item) else robj.NA_Logical
value = [item if pn.notnull(item) else NA_TYPES[value_type]
for item in value]

value = VECTOR_TYPES[value_type](value)

if not strings_as_factors:
Expand Down Expand Up @@ -242,6 +254,11 @@ def convert_to_r_matrix(df, strings_as_factors=False):

"""

if df._is_mixed_type:
raise TypeError("Conversion to matrix only possible with non-mixed "
"type DataFrames")


r_dataframe = convert_to_r_dataframe(df, strings_as_factors)
as_matrix = robj.baseenv.get("as.matrix")
r_matrix = as_matrix(r_dataframe)
Expand Down Expand Up @@ -291,34 +308,67 @@ def test_convert_matrix():

def test_convert_r_dataframe():

is_na = robj.baseenv.get("is.na")

seriesd = _test.getSeriesData()
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

#Null data
frame["E"] = [np.nan for item in frame["A"]]
# Some mixed type data
frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

r_dataframe = convert_to_r_dataframe(frame)

assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
assert all(is_na(item) for item in r_dataframe.rx2("E"))

for column in r_dataframe.colnames:
for column in frame[["A", "B", "C", "D"]]:
coldata = r_dataframe.rx2(column)
original_data = frame[column]
assert np.array_equal(convert_robj(coldata), original_data)

for column in frame[["D", "E"]]:
for original, converted in zip(frame[column],
r_dataframe.rx2(column)):

if pn.isnull(original):
assert is_na(converted)
else:
assert original == converted

def test_convert_r_matrix():

is_na = robj.baseenv.get("is.na")

seriesd = _test.getSeriesData()
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
#Null data
frame["E"] = [np.nan for item in frame["A"]]

r_dataframe = convert_to_r_matrix(frame)

assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

for column in r_dataframe.colnames:
for column in frame[["A", "B", "C", "D"]]:
coldata = r_dataframe.rx(True, column)
original_data = frame[column]
assert np.array_equal(convert_robj(coldata), original_data)
assert np.array_equal(convert_robj(coldata),
original_data)

# Pandas bug 1282
frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

#FIXME: Ugly, this whole module needs to be ported to nose/unittest
try:
wrong_matrix = convert_to_r_matrix(frame)
except TypeError:
pass
except Exception:
raise


if __name__ == '__main__':
Expand Down