Skip to content

Commit

Permalink
Merge pull request #1 from effigies/mnt/reshape-gifti-ascii-data
Browse files Browse the repository at this point in the history
RF: Consistently apply data type, shape and index order
  • Loading branch information
pauldmccarthy authored Feb 26, 2024
2 parents 6ffeeac + afbcc88 commit 5bcf012
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 44 deletions.
2 changes: 1 addition & 1 deletion nibabel/gifti/gifti.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ def agg_data(self, intent_code=None):
>>> triangles_2 = surf_img.agg_data('triangle')
>>> triangles_3 = surf_img.agg_data(1009) # Numeric code for pointset
>>> print(np.array2string(triangles))
[0 1 2]
[[0 1 2]]
>>> np.array_equal(triangles, triangles_2)
True
>>> np.array_equal(triangles, triangles_3)
Expand Down
70 changes: 29 additions & 41 deletions nibabel/gifti/parse_gifti_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,21 @@ def read_data_block(darray, fname, data, mmap):
if mmap is True:
mmap = 'c'
enclabel = gifti_encoding_codes.label[darray.encoding]
dtype = data_type_codes.type[darray.datatype]

if enclabel not in ('ASCII', 'B64BIN', 'B64GZ', 'External'):
raise GiftiParseError(f'Unknown encoding {darray.encoding}')

# Encode the endianness in the dtype
byteorder = gifti_endian_codes.byteorder[darray.endian]
dtype = data_type_codes.dtype[darray.datatype].newbyteorder(byteorder)

shape = tuple(darray.dims)
order = array_index_order_codes.npcode[darray.ind_ord]

# GIFTI_ENCODING_ASCII
if enclabel == 'ASCII':
# GIFTI_ENCODING_ASCII
c = StringIO(data)
da = np.loadtxt(c, dtype=dtype)
# Reshape to dims specified in GiftiDataArray attributes, but preserve
# existing behaviour of loading as 1D for arrays with a dimension of
# length 1
da = da.reshape(darray.dims).squeeze()
return da # independent of the endianness
elif enclabel not in ('B64BIN', 'B64GZ', 'External'):
return 0

# GIFTI_ENCODING_EXTBIN
return np.loadtxt(StringIO(data), dtype=dtype, ndmin=1).reshape(shape, order=order)

# We assume that the external data file is raw uncompressed binary, with
# the data type/endianness/ordering specified by the other DataArray
# attributes
Expand All @@ -98,53 +98,41 @@ def read_data_block(darray, fname, data, mmap):
newarr = None
if mmap:
try:
newarr = np.memmap(
return np.memmap(
ext_fname,
dtype=dtype,
mode=mmap,
offset=darray.ext_offset,
shape=tuple(darray.dims),
shape=shape,
order=order,
)
# If the memmap fails, we ignore the error and load the data into
# memory below
except (AttributeError, TypeError, ValueError):
pass
# mmap=False or np.memmap failed
if newarr is None:
# We can replace this with a call to np.fromfile in numpy>=1.17,
# as an "offset" parameter was added in that version.
with open(ext_fname, 'rb') as f:
f.seek(darray.ext_offset)
nbytes = np.prod(darray.dims) * dtype().itemsize
buff = f.read(nbytes)
newarr = np.frombuffer(buff, dtype=dtype)
return np.fromfile(
ext_fname,
dtype=dtype,
count=np.prod(darray.dims),
offset=darray.ext_offset,
).reshape(shape, order=order)

# Numpy arrays created from bytes objects are read-only.
# Neither b64decode nor decompress will return bytearrays, and there
# are not equivalents to fobj.readinto to allow us to pass them, so
# there is not a simple way to avoid making copies.
# If this becomes a problem, we should write a decoding interface with
# a tunable chunk size.
dec = base64.b64decode(data.encode('ascii'))
if enclabel == 'B64BIN':
buff = bytearray(dec)
else:
dec = base64.b64decode(data.encode('ascii'))
if enclabel == 'B64BIN':
# GIFTI_ENCODING_B64BIN
buff = bytearray(dec)
else:
# GIFTI_ENCODING_B64GZ
buff = bytearray(zlib.decompress(dec))
del dec
newarr = np.frombuffer(buff, dtype=dtype)

sh = tuple(darray.dims)
if len(newarr.shape) != len(sh):
newarr = newarr.reshape(sh, order=array_index_order_codes.npcode[darray.ind_ord])

# check if we need to byteswap
required_byteorder = gifti_endian_codes.byteorder[darray.endian]
if required_byteorder in ('big', 'little') and required_byteorder != sys.byteorder:
newarr = newarr.byteswap()
return newarr
# GIFTI_ENCODING_B64GZ
buff = bytearray(zlib.decompress(dec))
del dec
return np.frombuffer(buff, dtype=dtype).reshape(shape, order=order)


def _str2int(in_str):
Expand Down
13 changes: 11 additions & 2 deletions nibabel/gifti/tests/test_parse_gifti_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,16 @@
DATA_FILE7 = pjoin(IO_DATA_PATH, 'external.gii')
DATA_FILE8 = pjoin(IO_DATA_PATH, 'ascii_flat_data.gii')

datafiles = [DATA_FILE1, DATA_FILE2, DATA_FILE3, DATA_FILE4, DATA_FILE5, DATA_FILE6, DATA_FILE7, DATA_FILE8]
datafiles = [
DATA_FILE1,
DATA_FILE2,
DATA_FILE3,
DATA_FILE4,
DATA_FILE5,
DATA_FILE6,
DATA_FILE7,
DATA_FILE8,
]
numDA = [2, 1, 1, 1, 2, 1, 2, 2]

DATA_FILE1_darr1 = np.array(
Expand All @@ -51,7 +60,7 @@
[-17.614349, -65.401642, 21.071466],
]
)
DATA_FILE1_darr2 = np.array([0, 1, 2])
DATA_FILE1_darr2 = np.array([[0, 1, 2]])

DATA_FILE2_darr1 = np.array(
[
Expand Down

0 comments on commit 5bcf012

Please sign in to comment.