Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: pandas and double nested vectors issue 885 #912

Merged
merged 11 commits into from
Jul 7, 2023
14 changes: 8 additions & 6 deletions src/uproot/interpretation/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,14 +841,16 @@ def finalize(self, array, branch, interpretation, entry_start, entry_stop, optio
):
return pandas.Series(array, index=index)
else:
awkward_pandas = uproot.extras.awkward_pandas()
ak_lib = _libraries[Awkward.name]
ak_arr = ak_lib.finalize(
array = _libraries[Awkward.name].finalize(
array, branch, interpretation, entry_start, entry_stop, options
)
return pandas.Series(
awkward_pandas.AwkwardExtensionArray(ak_arr), index=index
)
if isinstance(
array.type.content, uproot.extras.awkward().types.NumpyType
) and array.layout.minmax_depth == (1, 1):
array = array.to_numpy()
else:
array = uproot.extras.awkward_pandas().AwkwardExtensionArray(array)
return pandas.Series(array, index=index)

def group(self, arrays, expression_context, how):
pandas = self.imported
Expand Down
19 changes: 16 additions & 3 deletions src/uproot/interpretation/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,15 @@ def basket_array(
)
assert basket.byte_offsets is not None

if self._forth and isinstance(library, uproot.interpretation.library.Awkward):
if self._forth and (
isinstance(
library,
(
uproot.interpretation.library.Awkward,
uproot.interpretation.library.Pandas,
),
)
):
output = self.basket_array_forth(
data,
byte_offsets,
Expand Down Expand Up @@ -403,9 +411,14 @@ def final_array(
output = numpy.array([], dtype=self.numpy_dtype)
elif all(
uproot._util.from_module(x, "awkward") for x in basket_arrays.values()
) and isinstance(
library,
(
uproot.interpretation.library.Awkward,
uproot.interpretation.library.Pandas,
),
):
assert isinstance(library, uproot.interpretation.library.Awkward)
awkward = library.imported
awkward = uproot.extras.awkward()
output = awkward.concatenate(trimmed, mergebool=False, highlevel=False)
else:
output = numpy.concatenate(trimmed)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_0910-fix_906_members_non_numerical_branches.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import uproot
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

import uproot
from skhep_testdata import data_path


Expand Down
54 changes: 54 additions & 0 deletions tests/test_0912-fix-pandas-and-double-nested-vectors-issue-885.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

import uproot
import pytest
import skhep_testdata, os
import numpy as np
import awkward as ak

ROOT = pytest.importorskip("ROOT")


def test_pandas_and_double_nested_vectors_issue_885(tmp_path):
filename = os.path.join(
tmp_path, "uproot_test_pandas_and_double_nested_vectors.root"
)
f = ROOT.TFile(filename, "recreate")
t = ROOT.TTree("mytree", "example tree")

vec1 = ROOT.std.vector("double")()
vec2 = ROOT.std.vector("double")()
vec_vec = ROOT.std.vector(ROOT.std.vector("double"))()

for i in range(3):
vec1.push_back(i)
for i in range(5):
vec2.push_back(i)

vec_vec.push_back(vec1)
vec_vec.push_back(vec2)

a = np.array([1, 2, 3, 4], dtype=np.uint32)
avec = ROOT.std.vector("unsigned int")(a)

b = np.array([[[0, 1, 3], [4, 5, 6], [7, 8, 9]]], dtype=np.uint32)
bvec = ROOT.std.vector("unsigned int")(b)

t.Branch("2Dvector", vec_vec)
t.Branch("1Dvector", avec)
t.Branch("othervector", bvec)

nentries = 25
for i in range(nentries):
t.Fill()

f.Write()

with uproot.open(filename)["mytree"] as fs:
u = fs.arrays(["2Dvector", "1Dvector", "othervector"], library="pd")
assert isinstance(u["2Dvector"][0], ak.highlevel.Array)
assert isinstance(u["1Dvector"][0], ak.highlevel.Array)
assert isinstance(u["othervector"][0], ak.highlevel.Array)
assert ak.to_list(u["2Dvector"][0]) == [[0, 1, 2], [0, 1, 2, 3, 4]]
assert ak.to_list(u["1Dvector"][0]) == [1, 2, 3, 4]
assert ak.to_list(u["othervector"][0]) == [0, 1, 3, 4, 5, 6, 7, 8, 9]