Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixturize JSON tests #31191

Merged
merged 15 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,11 @@ def tick_classes(request):
)


@pytest.fixture
def empty_series():
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
return pd.Series([], index=[], dtype=np.float64)


@pytest.fixture
def datetime_series():
"""
Expand All @@ -888,6 +893,26 @@ def datetime_series():
return s


@pytest.fixture
def string_series():
"""
Fixture for Series of floats with Index of unique strings
"""
s = tm.makeStringSeries()
s.name = "series"
return s


@pytest.fixture
def object_series():
"""
Fixture for Series of dtype object with Index of unique strings
"""
s = tm.makeObjectSeries()
s.name = "objects"
return s


@pytest.fixture
def float_frame():
"""
Expand Down
80 changes: 29 additions & 51 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,9 @@ def assert_json_roundtrip_equal(result, expected, orient):

@pytest.mark.filterwarnings("ignore:the 'numpy' keyword is deprecated:FutureWarning")
class TestPandasContainer:
@pytest.fixture(scope="function", autouse=True)
def setup(self, datapath):
self.dirpath = datapath("io", "json", "data")

self.ts = tm.makeTimeSeries()
self.ts.name = "ts"

self.series = tm.makeStringSeries()
self.series.name = "series"

self.objSeries = tm.makeObjectSeries()
self.objSeries.name = "objects"

self.empty_series = Series([], index=[], dtype=np.float64)
@pytest.fixture(autouse=True)
def setup(self):
self.empty_frame = DataFrame()

self.frame = _frame.copy()
self.frame2 = _frame2.copy()
self.intframe = _intframe.copy()
Expand All @@ -67,15 +54,6 @@ def setup(self, datapath):

yield

del self.dirpath

del self.ts

del self.series

del self.objSeries

del self.empty_series
del self.empty_frame

del self.frame
Expand Down Expand Up @@ -457,7 +435,7 @@ def test_frame_mixedtype_orient(self): # GH10289
left = read_json(inp, orient="values", convert_axes=False)
tm.assert_frame_equal(left, right)

def test_v12_compat(self):
def test_v12_compat(self, datapath):
df = DataFrame(
[
[1.56808523, 0.65727391, 1.81021139, -0.17251653],
Expand All @@ -474,12 +452,13 @@ def test_v12_compat(self):
df["modified"] = df["date"]
df.iloc[1, df.columns.get_loc("modified")] = pd.NaT

v12_json = os.path.join(self.dirpath, "tsframe_v012.json")
dirpath = datapath("io", "json", "data")
v12_json = os.path.join(dirpath, "tsframe_v012.json")
df_unser = pd.read_json(v12_json)
tm.assert_frame_equal(df, df_unser)

df_iso = df.drop(["modified"], axis=1)
v12_iso_json = os.path.join(self.dirpath, "tsframe_iso_v012.json")
v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
df_unser_iso = pd.read_json(v12_iso_json)
tm.assert_frame_equal(df_iso, df_unser_iso)

Expand Down Expand Up @@ -633,15 +612,15 @@ def test_series_non_unique_index(self):
unser = read_json(s.to_json(orient="records"), orient="records", typ="series")
tm.assert_numpy_array_equal(s.values, unser.values)

def test_series_default_orient(self):
assert self.series.to_json() == self.series.to_json(orient="index")
def test_series_default_orient(self, string_series):
assert string_series.to_json() == string_series.to_json(orient="index")

@pytest.mark.parametrize("numpy", [True, False])
def test_series_roundtrip_simple(self, orient, numpy):
data = self.series.to_json(orient=orient)
def test_series_roundtrip_simple(self, orient, numpy, string_series):
data = string_series.to_json(orient=orient)
result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
expected = self.series.copy()

expected = string_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
if orient != "split":
Expand All @@ -651,13 +630,13 @@ def test_series_roundtrip_simple(self, orient, numpy):

@pytest.mark.parametrize("dtype", [False, None])
@pytest.mark.parametrize("numpy", [True, False])
def test_series_roundtrip_object(self, orient, numpy, dtype):
data = self.objSeries.to_json(orient=orient)
def test_series_roundtrip_object(self, orient, numpy, dtype, object_series):
data = object_series.to_json(orient=orient)
result = pd.read_json(
data, typ="series", orient=orient, numpy=numpy, dtype=dtype
)
expected = self.objSeries.copy()

expected = object_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
if orient != "split":
Expand All @@ -666,12 +645,11 @@ def test_series_roundtrip_object(self, orient, numpy, dtype):
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("numpy", [True, False])
def test_series_roundtrip_empty(self, orient, numpy):
data = self.empty_series.to_json(orient=orient)
def test_series_roundtrip_empty(self, orient, numpy, empty_series):
data = empty_series.to_json(orient=orient)
result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
expected = self.empty_series.copy()

# TODO: see what causes inconsistency
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did this get resolved?

expected = empty_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
else:
Expand All @@ -680,11 +658,11 @@ def test_series_roundtrip_empty(self, orient, numpy):
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("numpy", [True, False])
def test_series_roundtrip_timeseries(self, orient, numpy):
data = self.ts.to_json(orient=orient)
def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks really similar to test_series_roundtrip_empty and test_series_roundtrip_object. would it make sense to something like any_series fixture for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea I think a great idea. I’m thinking worth doing after breaking out and setting up test_roundtrip.py with other parametrization unless a blocker here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good. This LGTM

data = datetime_series.to_json(orient=orient)
result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
expected = self.ts.copy()

expected = datetime_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
if orient != "split":
Expand Down Expand Up @@ -772,20 +750,20 @@ def test_path(self):
df.to_json(path)
read_json(path)

def test_axis_dates(self):
def test_axis_dates(self, datetime_series):

# frame
json = self.tsframe.to_json()
result = read_json(json)
tm.assert_frame_equal(result, self.tsframe)

# series
json = self.ts.to_json()
json = datetime_series.to_json()
result = read_json(json, typ="series")
tm.assert_series_equal(result, self.ts, check_names=False)
tm.assert_series_equal(result, datetime_series, check_names=False)
assert result.name is None

def test_convert_dates(self):
def test_convert_dates(self, datetime_series):

# frame
df = self.tsframe.copy()
Expand All @@ -805,7 +783,7 @@ def test_convert_dates(self):
tm.assert_frame_equal(result, expected)

# series
ts = Series(Timestamp("20130101"), index=self.ts.index)
ts = Series(Timestamp("20130101"), index=datetime_series.index)
json = ts.to_json()
result = read_json(json, typ="series")
tm.assert_series_equal(result, ts)
Expand Down Expand Up @@ -900,8 +878,8 @@ def test_date_format_frame_raises(self):
("20130101 20:43:42.123456789", "ns"),
],
)
def test_date_format_series(self, date, date_unit):
ts = Series(Timestamp(date), index=self.ts.index)
def test_date_format_series(self, date, date_unit, datetime_series):
ts = Series(Timestamp(date), index=datetime_series.index)
ts.iloc[1] = pd.NaT
ts.iloc[5] = pd.NaT
if date_unit:
Expand All @@ -914,8 +892,8 @@ def test_date_format_series(self, date, date_unit):
expected = expected.dt.tz_localize("UTC")
tm.assert_series_equal(result, expected)

def test_date_format_series_raises(self):
ts = Series(Timestamp("20130101 20:43:42.123"), index=self.ts.index)
def test_date_format_series_raises(self, datetime_series):
ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index)
msg = "Invalid value 'foo' for option 'date_unit'"
with pytest.raises(ValueError, match=msg):
ts.to_json(date_format="iso", date_unit="foo")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def series(index, _series_name, _static_values):


@pytest.fixture
def empty_series(series):
def empty_series_dti(series):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why change this? doesn't the series type depends on the _index_factory fixture.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to avoid naming conflicts with the top level empty_series fixture, which I think is more general; would they not conflict if named the same?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see #24886, @jbrockmendel removed empty series fixture since it is a singleton and inlined instead. adding a top-level empty_series fixture is inconsistent with the series tests.

The fixture here is a composable fixture and more generic and IMO should not be renamed.

Does the fixture added in this PR need to be in the top level conftest or could it be just added to pandas/tests/io/json/test_pandas.py as a module level fixture?

"""
Fixture for parametrization of empty Series with date_range,
period_range and timedelta_range indexes
Expand Down
20 changes: 10 additions & 10 deletions pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ def test_raises_on_non_datetimelike_index():

@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_empty_series(freq, empty_series, resample_method):
def test_resample_empty_series(freq, empty_series_dti, resample_method):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC the all_ts decorator is running this test for date_range, period_range and timedelta_range, not just dti

# GH12771 & GH12868

if resample_method == "ohlc":
pytest.skip("need to test for ohlc from GH13083")

s = empty_series
s = empty_series_dti
result = getattr(s.resample(freq), resample_method)()

expected = s.copy()
Expand All @@ -114,13 +114,13 @@ def test_resample_empty_series(freq, empty_series, resample_method):
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, empty_series, resample_method):
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
# GH28427
result = getattr(empty_series.resample(freq), resample_method)()
result = getattr(empty_series_dti.resample(freq), resample_method)()

index = _asfreq_compat(empty_series.index, freq)
index = _asfreq_compat(empty_series_dti.index, freq)

expected = pd.Series([], dtype="int64", index=index, name=empty_series.name)
expected = pd.Series([], dtype="int64", index=index, name=empty_series_dti.name)

tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -188,9 +188,9 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
empty_series = Series([], index, dtype)
empty_series_dti = Series([], index, dtype)
try:
getattr(empty_series.resample("d"), resample_method)()
getattr(empty_series_dti.resample("d"), resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
Expand Down Expand Up @@ -227,9 +227,9 @@ def test_resample_loffset_arg_type(frame, create_index, arg):


@all_ts
def test_apply_to_empty_series(empty_series):
def test_apply_to_empty_series(empty_series_dti):
# GH 14313
s = empty_series
s = empty_series_dti
for freq in ["M", "D", "H"]:
result = s.resample(freq).apply(lambda x: 1)
expected = s.resample(freq).apply(np.sum)
Expand Down