Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed block context from objToJSON #27166

Closed
wants to merge 38 commits into from
Closed
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
9cfdefd
Removed block context from objToJSON
WillAyd Jul 1, 2019
6f90e45
Moved some block stuff around
WillAyd Jul 3, 2019
0a01ada
Added frame context and stubbed iteration
WillAyd Jul 3, 2019
cb820d1
checpoint
WillAyd Jul 10, 2019
061b0c6
Merge remote-tracking branch 'upstream/master' into json-remove-blocks
WillAyd Jul 15, 2019
fae5c56
Compiled though broken :-X
WillAyd Jul 15, 2019
0ab46a4
working serialization
WillAyd Jul 15, 2019
4eabc63
Working basic example
WillAyd Jul 15, 2019
c4a6661
Fixed not sizing output buffer
WillAyd Jul 16, 2019
787c3bd
some docstrings
WillAyd Jul 16, 2019
3a6a13b
Set appropriate type context
WillAyd Jul 17, 2019
615c104
Working VALUES iteration
WillAyd Jul 17, 2019
1af960d
Working for all formats, save state mutation
WillAyd Jul 17, 2019
7eb25f3
Fixed segfaults and infinite loops with SPLIT
WillAyd Jul 17, 2019
92cc532
Fixed cStr handling
WillAyd Jul 17, 2019
296b494
more cleanups of c string conversions
WillAyd Jul 17, 2019
f67ecf9
Removed print statements
WillAyd Jul 17, 2019
13ca6c1
Merge remote-tracking branch 'upstream/master' into json-remove-blocks
WillAyd Jul 17, 2019
466380c
Removed breakpoint
WillAyd Jul 17, 2019
1b2b85b
Merge remote-tracking branch 'upstream/master' into json-remove-blocks
WillAyd Jul 17, 2019
9c7b2e1
Fixed issue with CategoricalIndex
WillAyd Jul 17, 2019
a6da784
Removed prints
WillAyd Jul 17, 2019
f6e8308
Merge remote-tracking branch 'upstream/master' into json-remove-blocks
WillAyd Jul 19, 2019
f6726ab
Going route of chaging NpyArr_encodeLabels
WillAyd Jul 19, 2019
48d5a26
Hack to get labels working
WillAyd Jul 19, 2019
96476dd
comments
WillAyd Jul 19, 2019
655d96e
Working implementation for almost all cases
WillAyd Jul 19, 2019
2c29a6f
Removed errant whitespace
WillAyd Jul 19, 2019
fb50c43
Removed unnused doc param
WillAyd Jul 19, 2019
a742b72
Removed PdFrameContext
WillAyd Jul 19, 2019
8003087
Removed unused transpose struct member
WillAyd Jul 19, 2019
8218515
Remove more block code
WillAyd Jul 19, 2019
04cb508
Fixed issue with label encoding
WillAyd Jul 19, 2019
a92c96b
Whitespace cleanup
WillAyd Jul 19, 2019
f60a139
Wide frame benchmark
WillAyd Jul 19, 2019
1de936f
Fixed new benchmark
WillAyd Jul 19, 2019
8b226a6
More comprehensive benchmarks
WillAyd Jul 20, 2019
82739be
Passed VALUES directly through to numpy
WillAyd Jul 20, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 63 additions & 8 deletions asv_bench/benchmarks/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ def peakmem_read_json_lines_concat(self, index):
class ToJSON(BaseIO):

fname = "__test__.json"
params = ["split", "columns", "index"]
params = ["split", "columns", "index", "records", "values"]
param_names = ["orient"]

def setup(self, lines_orient):
def setup(self, orient):
N = 10 ** 5
ncols = 5
index = date_range("20000101", periods=N, freq="H")
Expand Down Expand Up @@ -126,29 +126,84 @@ def time_float_int(self, orient):
def time_float_int_str(self, orient):
self.df_int_float_str.to_json(self.fname, orient=orient)

def time_floats_with_int_idex_lines(self, orient):

class ToJSONLines(BaseIO):

fname = "__test__.json"

def setup(self):
N = 10 ** 5
ncols = 5
index = date_range("20000101", periods=N, freq="H")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use params here instead? (you can leave setup like this), but the methods then get the params name

timedeltas = timedelta_range(start=1, periods=N, freq="s")
datetimes = date_range(start=1, periods=N, freq="s")
ints = np.random.randint(100000000, size=N)
floats = np.random.randn(N)
strings = tm.makeStringIndex(N)
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index)
self.df_td_int_ts = DataFrame(
{
"td_1": timedeltas,
"td_2": timedeltas,
"int_1": ints,
"int_2": ints,
"ts_1": datetimes,
"ts_2": datetimes,
},
index=index,
)
self.df_int_floats = DataFrame(
{
"int_1": ints,
"int_2": ints,
"int_3": ints,
"float_1": floats,
"float_2": floats,
"float_3": floats,
},
index=index,
)
self.df_int_float_str = DataFrame(
{
"int_1": ints,
"int_2": ints,
"float_1": floats,
"float_2": floats,
"str_1": strings,
"str_2": strings,
},
index=index,
)

def time_floats_with_int_index_lines(self):
self.df.to_json(self.fname, orient="records", lines=True)

def time_floats_with_dt_index_lines(self, orient):
def time_floats_with_dt_index_lines(self):
self.df_date_idx.to_json(self.fname, orient="records", lines=True)

def time_delta_int_tstamp_lines(self, orient):
def time_delta_int_tstamp_lines(self):
self.df_td_int_ts.to_json(self.fname, orient="records", lines=True)

def time_float_int_lines(self, orient):
def time_float_int_lines(self):
self.df_int_floats.to_json(self.fname, orient="records", lines=True)

def time_float_int_str_lines(self, orient):
def time_float_int_str_lines(self):
self.df_int_float_str.to_json(self.fname, orient="records", lines=True)


class ToJSONMem:
def setup_cache(self):
df = DataFrame([[1]])
frames = {"int": df, "float": df.astype(float)}
wide = DataFrame(np.ones((10_000, 10_000)))
frames = {"int": df, "float": df.astype(float), "wide": wide}

return frames

def mem_int(self, frames):
df = frames["wide"]
df.to_json()

def peakmem_int(self, frames):
df = frames["int"]
for _ in range(100_000):
Expand Down
Loading