Skip to content

Commit

Permalink
refactor: final refactoring for Forth generation (#749)
Browse files Browse the repository at this point in the history
* rename GenHelper to ForthLevelStash and add docstring with better explanation

* add error handling for Python class generation

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* change check procedure for Forth generation

* fix identation in generated Python code

* add forth generation cancellation

* add forth generation cancellation to streamer generated classes

* Rename @aryan26roy comments to 'AwkwardForth testing'.

* make forth the default

* fix some tests

* fix more tests

* chore: remove references to Identifier (once known as Identities).

* Also remove 'uproot' parameters from Forms.

* Carefully merged #710 and #749.

* This PR requires Awkward 2.0.0rc2.

* No TVector2.

* Structured the generated code in a more readable way.

* change parameters

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug

* fix awkward form

* add streamer name

* fix type

* fix typo

* fix awkward form

* fix code generation bug

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* linting

* remove ._v2

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: jpivarski <jpivarski@gmail.com>
  • Loading branch information
3 people authored Nov 6, 2022
1 parent 2492876 commit ad31773
Show file tree
Hide file tree
Showing 29 changed files with 525 additions and 407 deletions.
27 changes: 19 additions & 8 deletions src/uproot/_awkward_forth.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def replace_form_and_model(self, form, model):
return temp_node, temp_node_top, temp_form, temp_form_top, temp_prev_form

def get_code_recursive(self, node):
pre, post, init, header = self.tree_walk(node)
return pre, post, init, header

def tree_walk(self, node):
if "content" in node.keys():
if node["content"] is None:
return (
Expand All @@ -69,14 +73,14 @@ def get_code_recursive(self, node):
node["header_code"],
)
else:
pre, post, init, header = self.get_code_recursive(node["content"])
pre, post, init, header = self.tree_walk(node["content"])
pre2 = "".join(node["pre_code"])
pre2 = pre2 + pre
post2 = "".join(node["post_code"])
post2 = post2 + post
init = node["init_code"] + init
header = node["header_code"] + header
return pre2, post2, init, header
return pre2 + post2, "", init, header
elif self.var_set:
return "", "", "", ""

Expand Down Expand Up @@ -273,21 +277,28 @@ def add_to_init(self, code):
self.final_init.append(code)


class GenHelper:
def forth_stash(context):
"""
Returns a ForthLevelStash object if ForthGeneration is to be done, else None.
"""
if hasattr(context.get("forth"), "gen"):
return ForthLevelStash(context["forth"].gen)
else:
return None


class ForthLevelStash:
"""
Helper class to aid Forth code generation within one read/read_members function call.
Helper class to stash code at one level of Forth code generation. Keeps the code generation clean and maintains order for the code snippets.
"""

def __init__(self, context):
self.forth_present = False
self._pre_code = []
self._post_code = []
self._header = ""
self._init = ""
self._form_key = []
if hasattr(context.get("forth"), "gen"):
self.forth_present = True
self._gen_obj = context["forth"].gen
self._gen_obj = context

def is_forth(self):
return self.forth_present
Expand Down
244 changes: 121 additions & 123 deletions src/uproot/containers.py

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion src/uproot/deserialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ def c(name, version=None):

new_scope["c"] = c

_actually_compile(class_code, new_scope)
try:
_actually_compile(class_code, new_scope)
except SyntaxError as err:
raise SyntaxError(class_code + "\n\n" + str(err)) from err

out = new_scope[class_name]
out.class_code = class_code
Expand Down
50 changes: 27 additions & 23 deletions src/uproot/interpretation/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class AsObjects(uproot.interpretation.Interpretation):
def __init__(self, model, branch=None):
self._model = model
self._branch = branch
self._forth = False
self._forth = True
self._forth_lock = threading.Lock()
self._forth_form_keys = None
self._complete_forth_code = None
Expand Down Expand Up @@ -244,7 +244,7 @@ def basket_array_forth(

def _discover_forth(self, data, byte_offsets, branch, context, cursor_offset):
output = numpy.empty(len(byte_offsets) - 1, dtype=numpy.dtype(object))

context["cancel_forth"] = False
for i in range(len(byte_offsets) - 1):
byte_start = byte_offsets[i]
byte_stop = byte_offsets[i + 1]
Expand All @@ -253,7 +253,8 @@ def _discover_forth(self, data, byte_offsets, branch, context, cursor_offset):
cursor = uproot.source.cursor.Cursor(
0, origin=-(byte_start + cursor_offset)
)
context["forth"].gen.var_set = False
if "forth" in context.keys():
context["forth"].gen.var_set = False
output[i] = self._model.read(
chunk,
cursor,
Expand All @@ -262,26 +263,29 @@ def _discover_forth(self, data, byte_offsets, branch, context, cursor_offset):
branch.file.detached,
branch,
)
context["forth"].gen.awkward_model = context["forth"].gen.top_node
if not context["forth"].gen.var_set:
context["forth"].prereaddone = True
self._assemble_forth(
context["forth"].gen, context["forth"].gen.top_node["content"]
)
self._complete_forth_code = f"""input stream
input byteoffsets
input bytestops
{"".join(context["forth"].gen.final_header)}
{"".join(context["forth"].gen.final_init)}
0 do
byteoffsets I-> stack
stream seek
{"".join(context["forth"].gen.final_code)}
loop
"""
self._forth_form_keys = tuple(context["forth"].gen.form_keys)
self._form = context["forth"].gen.top_form
return None # we should re-read all the data with Forth
if context["cancel_forth"] and "forth" in context.keys():
del context["forth"]
if "forth" in context.keys():
context["forth"].gen.awkward_model = context["forth"].gen.top_node
if not context["forth"].gen.var_set:
context["forth"].prereaddone = True
self._assemble_forth(
context["forth"].gen, context["forth"].gen.top_node["content"]
)
self._complete_forth_code = f"""input stream
input byteoffsets
input bytestops
{"".join(context["forth"].gen.final_header)}
{"".join(context["forth"].gen.final_init)}
0 do
byteoffsets I-> stack
stream seek
{"".join(context["forth"].gen.final_code)}
loop
"""
self._forth_form_keys = tuple(context["forth"].gen.form_keys)
self._form = context["forth"].gen.top_form
return None # we should re-read all the data with Forth
return output # Forth-generation was unsuccessful: this is Python output

def _assemble_forth(self, forth_obj, awkward_model):
Expand Down
42 changes: 22 additions & 20 deletions src/uproot/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,16 +796,17 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):
context["breadcrumbs"] = old_breadcrumbs + (self,)

self.hook_before_read(chunk=chunk, cursor=cursor, context=context, file=file)
helper_obj = uproot._awkward_forth.GenHelper(context)
if helper_obj.is_forth():
forth_stash = uproot._awkward_forth.forth_stash(context)
if forth_stash is not None:
forth_obj = context["forth"].gen

if context.get("reading", True):
temp_index = cursor._index
self.read_numbytes_version(chunk, cursor, context)
length = cursor._index - temp_index
if length != 0:
helper_obj.add_to_pre(f"{length} stream skip\n")
if forth_stash is not None:
forth_stash.add_to_pre(f"{length} stream skip\n")
if (
issubclass(cls, VersionedModel)
and self._instance_version != classname_version(cls.__name__)
Expand All @@ -815,10 +816,10 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):
if classname_version(correct_cls.__name__) != classname_version(
cls.__name__
):
if helper_obj.is_forth():
if forth_stash is not None:
forth_obj.add_node(
"pass",
helper_obj.get_attrs(),
forth_stash.get_attrs(),
"i64",
1,
{},
Expand All @@ -834,30 +835,31 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):
parent,
concrete=concrete,
)
# if helper_obj.is_forth():
# if forth_stash is not None:
# forth_obj.go_to(temp)
return temp_var

if context.get("in_TBranch", False):
# @aryan26roy: test_0637's 01,02,05,08,09,11,12,13,15,16,29,35,38,39,44,45,46,47,49,50,52,56
# AwkwardForth testing: test_0637's 01,02,05,08,09,11,12,13,15,16,29,35,38,39,44,45,46,47,49,50,52,56
if self._num_bytes is None and self._instance_version != self.class_version:
self._instance_version = None
cursor = self._cursor
if helper_obj.is_forth():
helper_obj._pre_code.pop(-1)
if forth_stash is not None and not context["cancel_forth"]:
forth_stash._pre_code.pop(-1)

elif self._instance_version == 0:
helper_obj.add_to_pre("4 stream skip\n")
if forth_stash is not None:
forth_stash.add_to_pre("4 stream skip\n")
cursor.skip(4)

if context.get("reading", True):
self.hook_before_read_members(
chunk=chunk, cursor=cursor, context=context, file=file
)
if helper_obj.is_forth():
if forth_stash is not None:
forth_obj.add_node(
"model828",
helper_obj.get_attrs(),
forth_stash.get_attrs(),
"i64",
1,
{},
Expand Down Expand Up @@ -1321,10 +1323,10 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):
"""
import uproot.deserialization

helper_obj = uproot._awkward_forth.GenHelper(context)
forth_stash = uproot._awkward_forth.forth_stash(context)

if helper_obj.is_forth():
forth_obj = helper_obj.get_gen_obj()
if forth_stash is not None:
forth_obj = forth_stash.get_gen_obj()
# Ignores context["reading"], because otherwise, there would be nothing to do.
start_index = cursor._index
(
Expand All @@ -1335,12 +1337,12 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):

versioned_cls = cls.class_of_version(version)
bytes_skipped = cursor._index - start_index
if helper_obj.is_forth():
if forth_stash is not None:
# raise NotImplementedError
helper_obj.add_to_pre(f"{bytes_skipped} stream skip \n")
forth_stash.add_to_pre(f"{bytes_skipped} stream skip \n")
forth_obj.add_node(
"Model1319",
helper_obj.get_attrs(),
forth_stash.get_attrs(),
"i64",
1,
{},
Expand All @@ -1366,7 +1368,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):
)

# versioned_cls.read starts with numbytes_version again because move=False (above)
# if helper_obj.is_forth():
# if forth_stash is not None:
temp_var = cls.postprocess(
versioned_cls.read(
chunk, cursor, context, file, selffile, parent, concrete=concrete
Expand All @@ -1376,7 +1378,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None):
context,
file,
)
# if helper_obj.is_forth():
# if forth_stash is not None:
# if "no_go_to" not in context.keys():
# raise NotImplementedError
# forth_obj.go_to(temp_node)
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def read_numbytes_version(self, chunk, cursor, context):
pass

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TAtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Model_TAttLine_v1(uproot.model.VersionedModel):
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TBasket.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def read_numbytes_version(self, chunk, cursor, context):
pass

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
assert isinstance(self._parent, uproot.behaviors.TBranch.TBranch)
self._basket_num = context.get("basket_num")

Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Model_TBranch_v10(uproot.behaviors.TBranch.TBranch, uproot.model.Versioned
behaviors = (uproot.behaviors.TBranch.TBranch,)

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TClonesArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Model_TClonesArray(uproot.model.VersionedModel, Sequence):
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"memberwise serialization of {}\nin file {}".format(
Expand Down
12 changes: 6 additions & 6 deletions src/uproot/models/TDatime.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ def read_numbytes_version(self, chunk, cursor, context):
pass

def read_members(self, chunk, cursor, context, file):
helper_obj = uproot._awkward_forth.GenHelper(context)
if helper_obj.is_forth():
forth_obj = helper_obj.get_gen_obj()
forth_stash = uproot._awkward_forth.forth_stash(context)
if forth_stash is not None:
forth_obj = forth_stash.get_gen_obj()
key = forth_obj.get_keys(1)
form_key = f"node{key}-data"
helper_obj.add_to_header(f"output node{key}-data int32\n")
helper_obj.add_to_pre(f"stream !I-> node{key}-data\n")
forth_stash.add_to_header(f"output node{key}-data int32\n")
forth_stash.add_to_pre(f"stream !I-> node{key}-data\n")
form_key = f"node{key}-data"
if forth_obj.should_add_form():
forth_obj.add_form_key(form_key)
Expand All @@ -49,7 +49,7 @@ def read_members(self, chunk, cursor, context, file):
forth_obj.add_form(temp_aform)
temp_form = forth_obj.add_node(
f"node{key}",
helper_obj.get_attrs(),
forth_stash.get_attrs(),
"i64",
0,
None,
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Model_TGraph_v4(uproot.behaviors.TGraph.TGraph, uproot.model.VersionedMode
behaviors = (uproot.behaviors.TGraph.TGraph,)

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"memberwise serialization of {}\nin file {}".format(
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TH.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ class Model_TAxis_v10(uproot.behaviors.TAxis.TAxis, uproot.model.VersionedModel)
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/THashList.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def read_numbytes_version(self, chunk, cursor, context):
pass

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TLeaf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class Model_TLeaf_v2(uproot.model.VersionedModel):
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TList.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Model_TList(uproot.model.Model, Sequence):
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TMatrixT.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Model_TMatrixTSym_3c_double_3e__v5(uproot.model.VersionedModel):
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"memberwise serialization of {}\nin file {}".format(
Expand Down
1 change: 1 addition & 0 deletions src/uproot/models/TNamed.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Model_TNamed(uproot.model.Model):
"""

def read_members(self, chunk, cursor, context, file):
context["cancel_forth"] = True
if self.is_memberwise:
raise NotImplementedError(
"""memberwise serialization of {}
Expand Down
Loading

0 comments on commit ad31773

Please sign in to comment.