Skip to content

Commit

Permalink
added support for determining completeness of bytes or BytesIO object…
Browse files Browse the repository at this point in the history
…s rather than just file names; added is_XYZ methods that determine whether a file/bytes/BytesIO represents file type XYZ
  • Loading branch information
fracpete committed Mar 29, 2023
1 parent 560ae6a commit 91e4d88
Show file tree
Hide file tree
Showing 18 changed files with 423 additions and 95 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changelog
=========

0.0.4 (2023-03-30)
------------------

- added support for determining completeness of bytes or BytesIO objects rather than just file names
- added `is_XYZ` methods that determine whether a file/bytes/BytesIO represents file type `XYZ`


0.0.3 (2023-03-28)
------------------

Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ The **python-iamge-complete** package allows you to check for various
image types to check whether the image is complete or not. For doing
this, it looks for EOF (end of file) markers in the files or compares
the stored file length against the actual file length.
Can also operate on bytes or BytesIO objects.

Supported file formats:

Expand Down
20 changes: 17 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# python-image-complete

Python 3 library for checking whether an image is complete or not.
It is either looking for EOF markers or checking the length of the file against one stored in file.
It is either looking for EOF markers or checking the length of the file against one stored in the file.
Can also operate on bytes or BytesIO objects.


## Supported image formats

Expand All @@ -10,6 +13,7 @@ It is either looking for EOF markers or checking the length of the file against
* PNG (extension: .png)
* WebP (extension: .webp)


## File structures

* BMP (checks file length)
Expand Down Expand Up @@ -42,14 +46,24 @@ It is either looking for EOF markers or checking the length of the file against
```python
from image_complete.auto import is_image_complete

# using file names
print(is_image_complete("/some/where/hello_world.jpg"))
print(is_image_complete("/some/where/image.png"))

# using bytes or BytesIO
with open("/some/where/image.bmp", "rb") as fp:
b = fp.read()
print(is_image_complete(b))
```

* JPG specific

```python
from image_complete.jpg import is_jpg_complete
from image_complete.jpg import is_jpg_complete, is_jpg

print(is_jpg_complete("/some/where/hello_world.jpg"))
f = "/some/where/hello_world.jpg"
if is_jpg(f):
print(is_jpg_complete(f))
else:
print("Not a JPG!")
```
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _read(f):
packages=[
"image_complete",
],
version="0.0.3",
version="0.0.4",
author='Peter Reutemann',
author_email='fracpete@waikato.ac.nz',
)
61 changes: 41 additions & 20 deletions src/image_complete/auto.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,52 @@
from image_complete.bmp import is_bmp_complete
from image_complete.gif import is_gif_complete
from image_complete.jpg import is_jpg_complete
from image_complete.png import is_png_complete
from image_complete.webp import is_webp_complete
from io import BytesIO

from image_complete.bmp import is_bmp_complete, is_bmp
from image_complete.gif import is_gif_complete, is_gif
from image_complete.jpg import is_jpg_complete, is_jpg
from image_complete.png import is_png_complete, is_png
from image_complete.webp import is_webp_complete, is_webp

def is_image_complete(img_path):

def is_image_complete(img):
"""
Checks whether the image is complete. Auto-detects the type based on extension.
If the type is not supported, it will throw an exception.
:param img_path: the absolute path to the JPG image
:type img_path: str
:param img: the absolute path to the image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if complete
:rtype: bool
"""
if isinstance(img, bytes):
img = BytesIO(img)

if isinstance(img, str):
name = img.lower()
if name.endswith(".gif"):
return is_gif_complete(img)
elif name.endswith(".jpg") or name.endswith(".jpeg"):
return is_jpg_complete(img)
elif name.endswith(".png"):
return is_png_complete(img)
elif name.endswith(".bmp"):
return is_bmp_complete(img)
elif name.endswith(".webp"):
return is_webp_complete(img)
else:
raise Exception("Unsupported file type: " + img)

name = img_path.lower()
if name.endswith(".gif"):
return is_gif_complete(img_path)
elif name.endswith(".jpg") or name.endswith(".jpeg"):
return is_jpg_complete(img_path)
elif name.endswith(".png"):
return is_png_complete(img_path)
elif name.endswith(".bmp"):
return is_bmp_complete(img_path)
elif name.endswith(".webp"):
return is_webp_complete(img_path)
elif isinstance(img, BytesIO):
if is_bmp(img):
return is_bmp_complete(img)
elif is_gif(img):
return is_gif_complete(img)
elif is_jpg(img):
return is_jpg_complete(img)
elif is_png(img):
return is_png_complete(img)
elif is_webp(img):
return is_webp_complete(img)
else:
raise Exception("Failed to determine file type!")
else:
raise Exception("Unsupported file type: " + img_path)
raise Exception("Unsupported data type: %s" % str(type(img)))
26 changes: 26 additions & 0 deletions src/image_complete/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os

from io import BytesIO


def load(img):
"""
Loads the data and returns it as BytesIO object and the associated length.
:param img: the image to load
:type img: str or bytes or BytesIO
:return: tuple of BytesIO wrapper and length
:rtype: tuple
"""
if isinstance(img, bytes):
img = BytesIO(img)
if isinstance(img, BytesIO):
return img, img.getbuffer().nbytes
if isinstance(img, str):
flen = os.path.getsize(img)
with open(img, "rb") as f:
img = BytesIO(f.read())
return img, flen
else:
print("Unhandled data type: %s" % str(type(img)))
return None, None
46 changes: 34 additions & 12 deletions src/image_complete/bmp.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,49 @@
import os
import struct

from io import BytesIO
from .base import load

def is_bmp_complete(img_path):

def is_bmp(img):
"""
Checks whether the image represents a bitmap.
https://en.wikipedia.org/wiki/BMP_file_format#File_structure
:param img: the absolute path to the BMP image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if a bitmap
:rtype: bool
"""
data, _ = load(img)
try:
data.seek(0)
header = data.read(2)
return header in [b"BM", b"BA", b"CI", b"CP", b"IC", b"PT"]
except:
return False


def is_bmp_complete(img):
"""
Checks whether the BMP image is complete.
https://en.wikipedia.org/wiki/BMP_file_format#File_structure
:param img_path: the absolute path to the BMP image
:type img_path: str
:param img: the absolute path to the BMP image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if complete
:rtype: bool
"""

try:
flen = os.path.getsize(img_path)
if flen > 6:
with open(img_path, "rb") as f:
f.seek(2, 0)
data = f.read(4)
blen = struct.unpack('I', data)
return blen[0] == flen
data, data_len = load(img)
if data is None:
return False
if data_len > 6:
data.seek(2, 0)
data = data.read(4)
blen = struct.unpack('I', data)
return blen[0] == data_len
else:
return False
except:
Expand Down
43 changes: 32 additions & 11 deletions src/image_complete/gif.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,46 @@
import os
from io import BytesIO
from .base import load


def is_gif_complete(img_path):
def is_gif(img):
"""
Checks whether the image represents a GIF.
https://en.wikipedia.org/wiki/GIF#File_format
:param img: the absolute path to the GIF image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if a GIF
:rtype: bool
"""
data, _ = load(img)
try:
data.seek(0)
header = data.read(6)
return header == b"GIF89a"
except:
return False


def is_gif_complete(img):
"""
Checks whether the GIF image is complete.
https://en.wikipedia.org/wiki/GIF#File_format
:param img_path: the absolute path to the GIF image
:type img_path: str
:param img: the absolute path to the GIF image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if complete
:rtype: bool
"""

try:
flen = os.path.getsize(img_path)
if flen > 1:
with open(img_path, "rb") as f:
f.seek(flen - 1, 0)
marker = f.read(1)
return marker[0] == 59
data, data_len = load(img)
if data is None:
return False
if data_len > 1:
data.seek(data_len - 1, 0)
marker = data.read(1)
return marker[0] == 59
else:
return False
except:
Expand Down
47 changes: 33 additions & 14 deletions src/image_complete/jpg.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,46 @@
import os
from io import BytesIO
from .base import load


def is_jpg_complete(img_path):
def is_jpg(img):
"""
Checks whether the image represents a JPG.
http://en.wikipedia.org/wiki/JPEG#Syntax_and_structure
:param img: the absolute path to the JPG image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if a bitmap
:rtype: bool
"""
data, _ = load(img)
try:
data.seek(0)
header = data.read(2)
return (header[0] == 0xFF) and (header[1] == 0xD8)
except:
return False


def is_jpg_complete(img):
"""
Checks whether the JPG image is complete.
https://en.wikipedia.org/wiki/Portable_Network_Graphics#Critical_chunks
http://www.libpng.org/pub/png/spec/1.2/PNG-Structure.html#Chunk-layout
http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IEND
http://en.wikipedia.org/wiki/JPEG#Syntax_and_structure
:param img_path: the absolute path to the PNG image
:type img_path: str
:param img: the absolute path to the JPG image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:return: True if complete
:rtype: bool
"""

try:
flen = os.path.getsize(img_path)
if flen > 2:
with open(img_path, "rb") as f:
f.seek(flen - 2, 0)
marker = f.read(2)
return (marker[0] == 0xFF) and (marker[1] == 0xD9)
data, data_len = load(img)
if data is None:
return False
if data_len > 2:
data.seek(data_len - 2, 0)
marker = data.read(2)
return (marker[0] == 0xFF) and (marker[1] == 0xD9)
else:
return False
except:
Expand Down
Loading

0 comments on commit 91e4d88

Please sign in to comment.