From af45301cea8e4c005494b2b8eb6a946eea6987b4 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Fri, 26 Jul 2019 09:35:28 -0700 Subject: [PATCH] Expanded ASVs for to_json (#27595) --- asv_bench/benchmarks/io/json.py | 90 ++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 0ce42856fb14a..fc07f2a484102 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -63,10 +63,13 @@ def peakmem_read_json_lines_concat(self, index): class ToJSON(BaseIO): fname = "__test__.json" - params = ["split", "columns", "index"] - param_names = ["orient"] + params = [ + ["split", "columns", "index", "values", "records"], + ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ] + param_names = ["orient", "frame"] - def setup(self, lines_orient): + def setup(self, orient, frame): N = 10 ** 5 ncols = 5 index = date_range("20000101", periods=N, freq="H") @@ -111,34 +114,85 @@ def setup(self, lines_orient): index=index, ) - def time_floats_with_int_index(self, orient): - self.df.to_json(self.fname, orient=orient) + def time_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) - def time_floats_with_dt_index(self, orient): - self.df_date_idx.to_json(self.fname, orient=orient) + def mem_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) + + def time_to_json_wide(self, orient, frame): + base_df = getattr(self, frame).copy() + df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df.to_json(self.fname, orient=orient) + + def mem_to_json_wide(self, orient, frame): + base_df = getattr(self, frame).copy() + df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df.to_json(self.fname, orient=orient) - def time_delta_int_tstamp(self, orient): - self.df_td_int_ts.to_json(self.fname, orient=orient) - def time_float_int(self, orient): - self.df_int_floats.to_json(self.fname, orient=orient) +class ToJSONLines(BaseIO): - def time_float_int_str(self, orient): - self.df_int_float_str.to_json(self.fname, orient=orient) + fname = "__test__.json" + + def setup(self): + N = 10 ** 5 + ncols = 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + ints = np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "int_1": ints, + "int_2": ints, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + self.df_int_floats = DataFrame( + { + "int_1": ints, + "int_2": ints, + "int_3": ints, + "float_1": floats, + "float_2": floats, + "float_3": floats, + }, + index=index, + ) + self.df_int_float_str = DataFrame( + { + "int_1": ints, + "int_2": ints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) - def time_floats_with_int_idex_lines(self, orient): + def time_floats_with_int_idex_lines(self): self.df.to_json(self.fname, orient="records", lines=True) - def time_floats_with_dt_index_lines(self, orient): + def time_floats_with_dt_index_lines(self): self.df_date_idx.to_json(self.fname, orient="records", lines=True) - def time_delta_int_tstamp_lines(self, orient): + def time_delta_int_tstamp_lines(self): self.df_td_int_ts.to_json(self.fname, orient="records", lines=True) - def time_float_int_lines(self, orient): + def time_float_int_lines(self): self.df_int_floats.to_json(self.fname, orient="records", lines=True) - def time_float_int_str_lines(self, orient): + def time_float_int_str_lines(self): self.df_int_float_str.to_json(self.fname, orient="records", lines=True)