Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor image utils #627

Merged
merged 2 commits into from
May 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/development/Dockerfile.onnx-test
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ RUN umask 0 \
&& . /opt/miniconda3/bin/activate \
&& conda create -n nnabla-build python=${PYVERNAME} \
&& conda activate nnabla-build \
&& conda install -c conda-forge pydicom gdcm \
&& pip install numpy \
&& pip install -U -r /tmp/deps/setup_requirements.txt \
&& pip install -U -r /tmp/deps/requirements.txt \
Expand Down
1 change: 1 addition & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def extopts(library_name, library_dir):
'nnabla.utils.converter.tensorflow',
'nnabla.utils.factorization',
'nnabla.utils.image_utils',
'nnabla.utils.image_utils.backend_events',
'nnabla.utils.audio_utils',
'nnabla.backward_function',
'nnabla_ext',
Expand Down
103 changes: 2 additions & 101 deletions python/src/nnabla/utils/data_source_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,109 +255,13 @@ def load_image_imread(file, shape=None, max_range=1.0):
return img255 * (max_range / 255.0)


def load_image_pypng(file, shape=None, max_range=1.0):
import png
r = png.Reader(file=file)
width, height, pixels, metadata = r.read()
bitscale = 2 ** metadata['bitdepth'] - 1
img = numpy.array(list(pixels), dtype=numpy.float32).reshape(
(height, width, -1)) / bitscale # (height, width, n_channel)
if metadata['alpha'] and metadata['planes'] == 4: # RGBA
# TODO: this case is note tested well
try:
bg = numpy.array(metadata['background']) / bitscale
except KeyError:
bg = numpy.array([1.0, 1.0, 1.0])
rgb = img[:, :, :3]
alpha = img[:, :, 3]
imshp = alpha.shape
img = numpy.outer((1 - alpha), bg).reshape(imshp + (3,)) +\
numpy.tile(alpha.reshape(imshp + (1,)), (1, 1, 3)) * rgb
out_n_color = 3
elif metadata['alpha'] and metadata['planes'] == 2: # (gray, alpha)
# TODO: this case is note tested well
try:
bg = numpy.array(metadata['background']) / bitscale
except KeyError:
bg = numpy.array([1.0])
rgb = img[:, :, :1]
alpha = img[:, :, 1]
imshp = alpha.shape
img = numpy.outer((1 - alpha), bg).reshape(imshp + (1,)
) + alpha.reshape(imshp + (1,)) * rgb
out_n_color = 1
else: # RGB or Gray
out_n_color = metadata['planes']

# Reshape image
if max_range < 0:
max_range = 255
if shape is None:
return img.transpose(2, 0, 1) * max_range
else:
out_n_color, out_height, out_width = shape
return imresize(img, (out_height, out_width)).transpose((2, 0, 1)) * max_range / 255.0


def load_image_cv2(file, shape=None, max_range=1.0):
img = cv2.imdecode(numpy.asarray(bytearray(file.read()),
dtype=numpy.uint8), cv2.IMREAD_UNCHANGED)

if len(img.shape) == 2: # gray image
height, width = img.shape
img = img.reshape(1, height, width)

elif len(img.shape) == 3: # rgb image
if img.shape[2] == 3:
img = img[:, :, ::-1].copy() # BGR to RGB
img = img.transpose(2, 0, 1)
elif img.shape[2] == 4:
img = img.transpose(2, 0, 1) # BGRA to RGBA
img = numpy.array([img[2], img[1], img[0], img[3]])

if max_range < 0:
pass
elif max_range == 255:
if img.dtype == numpy.uint8:
pass
elif img.dtype == numpy.uint16:
img = numpy.uint8(img / 256)
elif max_range == 65535:
if img.dtype == numpy.uint8:
img = numpy.uint16(img * 256)
elif img.dtype == numpy.uint16:
pass
else:
if img.dtype == numpy.uint8:
img = numpy.float32(img) * max_range / 255.0
elif img.dtype == numpy.uint16:
img = numpy.float32(img) * max_range / 65535.0
return img


def load_image(file, shape=None, normalize=False):
if normalize:
max_range = 1.0
else:
max_range = -1
global cv2_available
global pypng_available

if cv2_available:
return load_image_cv2(file, shape, max_range)
else:
ext = None
try:
ext = os.path.splitext(file.name)[1].lower()
except:
pass
if ext == '.png' and pypng_available:
r = png.Reader(file=file)
width, height, pixels, metadata = r.read()
file.seek(0)
if metadata['bitdepth'] > 8: # if png with high bitdepth
return load_image_pypng(file, shape, max_range)
return load_image_imread(file, shape, max_range)
return load_image_imread(file, shape, max_range)


def load_csv(file, shape=None, normalize=False):
Expand Down Expand Up @@ -430,15 +334,12 @@ def load_audio(file, shape=None, normalize=False):
'.gif': load_image,
'.tif': load_image,
'.tiff': load_image,
'.dcm': load_image,
'.csv': load_csv,
'.npy': load_npy,
'.wav': load_audio}


def register_load_function(ext, function):
_load_functions[ext] = function


def load(ext):
import nnabla.utils.callback as callback
func = callback.get_load_image_func(ext)
Expand Down
19 changes: 11 additions & 8 deletions python/src/nnabla/utils/image_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import numpy as np

from .backend_manager import backend_manager
from .common import rescale_pixel_intensity
from .backend_events.common import rescale_pixel_intensity


def set_backend(backend):
Expand All @@ -29,7 +29,7 @@ def set_backend(backend):
backend (str): the name of image_utils` backend
"""

backend_manager.backend = backend
backend_manager.set_backend(backend)


def get_backend():
Expand All @@ -40,7 +40,7 @@ def get_backend():
str
"""

return backend_manager.backend
return backend_manager.get_backend()


def get_available_backends():
Expand Down Expand Up @@ -111,9 +111,10 @@ def imread(path, grayscale=False, size=None, interpolate="bilinear",
if as_uint16=True output dtype is np.uint16, else np.uint8 (default).
"""

return backend_manager.module.imread(path, grayscale=grayscale, size=size, interpolate=interpolate,
channel_first=channel_first, as_uint16=as_uint16, num_channels=num_channels,
**kwargs)
best_backend = backend_manager.get_best_backend(path, "load")
return best_backend.imread(path, grayscale=grayscale, size=size, interpolate=interpolate,
channel_first=channel_first, as_uint16=as_uint16, num_channels=num_channels,
**kwargs)


def imsave(path, img, channel_first=False, as_uint16=False, auto_scale=True, **kwargs):
Expand All @@ -137,7 +138,8 @@ def imsave(path, img, channel_first=False, as_uint16=False, auto_scale=True, **k
The range of upscaled pixel values depends on output dtype, which is [0, 255] as uint8 and [0, 65535] as uint16.
"""

backend_manager.module.imsave(
best_backend = backend_manager.get_best_backend(path, "save")
best_backend.imsave(
path, img, channel_first=channel_first, as_uint16=as_uint16, auto_scale=auto_scale, **kwargs)


Expand All @@ -162,7 +164,8 @@ def imresize(img, size, interpolate="bilinear", channel_first=False, **kwargs):
numpy.ndarray
"""

return backend_manager.module.imresize(img, size, interpolate=interpolate, channel_first=channel_first, **kwargs)
best_backend = backend_manager.get_best_backend(img, "resize")
return best_backend.imresize(img, size, interpolate=interpolate, channel_first=channel_first, **kwargs)


# alias
Expand Down
13 changes: 13 additions & 0 deletions python/src/nnabla/utils/image_utils/backend_events/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2020 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017 Sony Corporation. All Rights Reserved.
# Copyright (c) 2020 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
Loading