From b46c82946d6bd88b73164904834567b12aadf935 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 23 Feb 2024 10:05:56 -0500 Subject: [PATCH 1/2] RF: Consistently apply data type, shape and index order in GIFTI data blocks --- nibabel/gifti/parse_gifti_fast.py | 70 +++++++++++++------------------ 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/nibabel/gifti/parse_gifti_fast.py b/nibabel/gifti/parse_gifti_fast.py index af01dd544..ccd608324 100644 --- a/nibabel/gifti/parse_gifti_fast.py +++ b/nibabel/gifti/parse_gifti_fast.py @@ -68,21 +68,21 @@ def read_data_block(darray, fname, data, mmap): if mmap is True: mmap = 'c' enclabel = gifti_encoding_codes.label[darray.encoding] - dtype = data_type_codes.type[darray.datatype] + if enclabel not in ('ASCII', 'B64BIN', 'B64GZ', 'External'): + raise GiftiParseError(f'Unknown encoding {darray.encoding}') + + # Encode the endianness in the dtype + byteorder = gifti_endian_codes.byteorder[darray.endian] + dtype = data_type_codes.dtype[darray.datatype].newbyteorder(byteorder) + + shape = tuple(darray.dims) + order = array_index_order_codes.npcode[darray.ind_ord] + + # GIFTI_ENCODING_ASCII if enclabel == 'ASCII': - # GIFTI_ENCODING_ASCII - c = StringIO(data) - da = np.loadtxt(c, dtype=dtype) - # Reshape to dims specified in GiftiDataArray attributes, but preserve - # existing behaviour of loading as 1D for arrays with a dimension of - # length 1 - da = da.reshape(darray.dims).squeeze() - return da # independent of the endianness - elif enclabel not in ('B64BIN', 'B64GZ', 'External'): - return 0 - - # GIFTI_ENCODING_EXTBIN + return np.loadtxt(StringIO(data), dtype=dtype, ndmin=1).reshape(shape, order=order) + # We assume that the external data file is raw uncompressed binary, with # the data type/endianness/ordering specified by the other DataArray # attributes @@ -98,12 +98,13 @@ def read_data_block(darray, fname, data, mmap): newarr = None if mmap: try: - newarr = np.memmap( + return np.memmap( ext_fname, dtype=dtype, mode=mmap, offset=darray.ext_offset, - shape=tuple(darray.dims), + shape=shape, + order=order, ) # If the memmap fails, we ignore the error and load the data into # memory below @@ -111,13 +112,12 @@ def read_data_block(darray, fname, data, mmap): pass # mmap=False or np.memmap failed if newarr is None: - # We can replace this with a call to np.fromfile in numpy>=1.17, - # as an "offset" parameter was added in that version. - with open(ext_fname, 'rb') as f: - f.seek(darray.ext_offset) - nbytes = np.prod(darray.dims) * dtype().itemsize - buff = f.read(nbytes) - newarr = np.frombuffer(buff, dtype=dtype) + return np.fromfile( + ext_fname, + dtype=dtype, + count=np.prod(darray.dims), + offset=darray.ext_offset, + ).reshape(shape, order=order) # Numpy arrays created from bytes objects are read-only. # Neither b64decode nor decompress will return bytearrays, and there @@ -125,26 +125,14 @@ def read_data_block(darray, fname, data, mmap): # there is not a simple way to avoid making copies. # If this becomes a problem, we should write a decoding interface with # a tunable chunk size. + dec = base64.b64decode(data.encode('ascii')) + if enclabel == 'B64BIN': + buff = bytearray(dec) else: - dec = base64.b64decode(data.encode('ascii')) - if enclabel == 'B64BIN': - # GIFTI_ENCODING_B64BIN - buff = bytearray(dec) - else: - # GIFTI_ENCODING_B64GZ - buff = bytearray(zlib.decompress(dec)) - del dec - newarr = np.frombuffer(buff, dtype=dtype) - - sh = tuple(darray.dims) - if len(newarr.shape) != len(sh): - newarr = newarr.reshape(sh, order=array_index_order_codes.npcode[darray.ind_ord]) - - # check if we need to byteswap - required_byteorder = gifti_endian_codes.byteorder[darray.endian] - if required_byteorder in ('big', 'little') and required_byteorder != sys.byteorder: - newarr = newarr.byteswap() - return newarr + # GIFTI_ENCODING_B64GZ + buff = bytearray(zlib.decompress(dec)) + del dec + return np.frombuffer(buff, dtype=dtype).reshape(shape, order=order) def _str2int(in_str): From afbcc88d2c3ff83df3acadbff4741a790d2d5647 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 23 Feb 2024 10:08:22 -0500 Subject: [PATCH 2/2] TEST: Expect data arrays to be the advertised shapes --- nibabel/gifti/gifti.py | 2 +- nibabel/gifti/tests/test_parse_gifti_fast.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/nibabel/gifti/gifti.py b/nibabel/gifti/gifti.py index 76bad4677..7aba87730 100644 --- a/nibabel/gifti/gifti.py +++ b/nibabel/gifti/gifti.py @@ -745,7 +745,7 @@ def agg_data(self, intent_code=None): >>> triangles_2 = surf_img.agg_data('triangle') >>> triangles_3 = surf_img.agg_data(1009) # Numeric code for pointset >>> print(np.array2string(triangles)) - [0 1 2] + [[0 1 2]] >>> np.array_equal(triangles, triangles_2) True >>> np.array_equal(triangles, triangles_3) diff --git a/nibabel/gifti/tests/test_parse_gifti_fast.py b/nibabel/gifti/tests/test_parse_gifti_fast.py index 49f2729f3..f97242567 100644 --- a/nibabel/gifti/tests/test_parse_gifti_fast.py +++ b/nibabel/gifti/tests/test_parse_gifti_fast.py @@ -41,7 +41,16 @@ DATA_FILE7 = pjoin(IO_DATA_PATH, 'external.gii') DATA_FILE8 = pjoin(IO_DATA_PATH, 'ascii_flat_data.gii') -datafiles = [DATA_FILE1, DATA_FILE2, DATA_FILE3, DATA_FILE4, DATA_FILE5, DATA_FILE6, DATA_FILE7, DATA_FILE8] +datafiles = [ + DATA_FILE1, + DATA_FILE2, + DATA_FILE3, + DATA_FILE4, + DATA_FILE5, + DATA_FILE6, + DATA_FILE7, + DATA_FILE8, +] numDA = [2, 1, 1, 1, 2, 1, 2, 2] DATA_FILE1_darr1 = np.array( @@ -51,7 +60,7 @@ [-17.614349, -65.401642, 21.071466], ] ) -DATA_FILE1_darr2 = np.array([0, 1, 2]) +DATA_FILE1_darr2 = np.array([[0, 1, 2]]) DATA_FILE2_darr1 = np.array( [