Skip to content

Commit

Permalink
added ability to operate in lenient mode, ie tolerating trailing junk…
Browse files Browse the repository at this point in the history
… data
  • Loading branch information
fracpete committed Apr 17, 2023
1 parent 91e4d88 commit 90e2c52
Show file tree
Hide file tree
Showing 21 changed files with 303 additions and 58 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
=========

0.0.5 (2023-04-17)
------------------

- added support for *lenient mode* via `strict` and `check_size` parameters


0.0.4 (2023-03-30)
------------------

Expand Down
72 changes: 47 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

Python 3 library for checking whether an image is complete or not.
It is either looking for EOF markers or checking the length of the file against one stored in the file.
Can also operate on bytes or BytesIO objects.

Can also operate on bytes or BytesIO objects.

By default, the library operates in **strict** mode, i.e., no trailing junk data
is tolerated. However, by supplying the parameters `strict` and `check_size` this
can turned into **lenient** mode. `check_size` is only used for formats gif, jpg, png.


## Supported image formats
Expand Down Expand Up @@ -41,29 +46,46 @@ Can also operate on bytes or BytesIO objects.

## Examples

* auto detection
### Auto detection

```python
from image_complete.auto import is_image_complete
```python
from image_complete.auto import is_image_complete

# using file names
print(is_image_complete("/some/where/hello_world.jpg"))
print(is_image_complete("/some/where/image.png"))

# using bytes or BytesIO
with open("/some/where/image.bmp", "rb") as fp:
b = fp.read()
print(is_image_complete(b))
```

* JPG specific

```python
from image_complete.jpg import is_jpg_complete, is_jpg

f = "/some/where/hello_world.jpg"
if is_jpg(f):
print(is_jpg_complete(f))
else:
print("Not a JPG!")
```
# using file names
print(is_image_complete("/some/where/hello_world.jpg"))
print(is_image_complete("/some/where/image.png"))

# using bytes or BytesIO
with open("/some/where/image.bmp", "rb") as fp:
b = fp.read()
print(is_image_complete(b))
```


### JPG specific

```python
from image_complete.jpg import is_jpg_complete, is_jpg

f = "/some/where/hello_world.jpg"
if is_jpg(f):
print(is_jpg_complete(f))
else:
print("Not a JPG!")
```


### Lenient mode (i.e., tolerating trailing junk data)

```python
from image_complete.auto import is_image_complete

# using file names
print(is_image_complete("/some/where/hello_world.jpg", strict=False, check_size=100))
print(is_image_complete("/some/where/image.png", strict=False, check_size=100))

# using bytes or BytesIO
with open("/some/where/image.bmp", "rb") as fp:
b = fp.read()
print(is_image_complete(b, strict=False))
```
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _read(f):
packages=[
"image_complete",
],
version="0.0.4",
version="0.0.5",
author='Peter Reutemann',
author_email='fracpete@waikato.ac.nz',
)
31 changes: 18 additions & 13 deletions src/image_complete/auto.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
from io import BytesIO

from image_complete.base import DEFAULT_CHECK_SIZE
from image_complete.bmp import is_bmp_complete, is_bmp
from image_complete.gif import is_gif_complete, is_gif
from image_complete.jpg import is_jpg_complete, is_jpg
from image_complete.png import is_png_complete, is_png
from image_complete.webp import is_webp_complete, is_webp


def is_image_complete(img):
def is_image_complete(img, strict=True, check_size=DEFAULT_CHECK_SIZE):
"""
Checks whether the image is complete. Auto-detects the type based on extension.
If the type is not supported, it will throw an exception.
:param img: the absolute path to the image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:param strict: if True then no junk data after actual data is allowed
:type strict: bool
:param check_size: the number of bytes from the end of the file to look for EOF marker (only used by: gif, jpg, png)
:type check_size: int
:return: True if complete
:rtype: bool
"""
Expand All @@ -22,30 +27,30 @@ def is_image_complete(img):

if isinstance(img, str):
name = img.lower()
if name.endswith(".gif"):
return is_gif_complete(img)
if name.endswith(".bmp"):
return is_bmp_complete(img, strict=strict)
elif name.endswith(".gif"):
return is_gif_complete(img, strict=strict, check_size=check_size)
elif name.endswith(".jpg") or name.endswith(".jpeg"):
return is_jpg_complete(img)
return is_jpg_complete(img, strict=strict, check_size=check_size)
elif name.endswith(".png"):
return is_png_complete(img)
elif name.endswith(".bmp"):
return is_bmp_complete(img)
return is_png_complete(img, strict=strict, check_size=check_size)
elif name.endswith(".webp"):
return is_webp_complete(img)
return is_webp_complete(img, strict=strict)
else:
raise Exception("Unsupported file type: " + img)

elif isinstance(img, BytesIO):
if is_bmp(img):
return is_bmp_complete(img)
return is_bmp_complete(img, strict=strict)
elif is_gif(img):
return is_gif_complete(img)
return is_gif_complete(img, strict=strict, check_size=check_size)
elif is_jpg(img):
return is_jpg_complete(img)
return is_jpg_complete(img, strict=strict, check_size=check_size)
elif is_png(img):
return is_png_complete(img)
return is_png_complete(img, strict=strict, check_size=check_size)
elif is_webp(img):
return is_webp_complete(img)
return is_webp_complete(img, strict=strict)
else:
raise Exception("Failed to determine file type!")
else:
Expand Down
4 changes: 4 additions & 0 deletions src/image_complete/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from io import BytesIO


DEFAULT_CHECK_SIZE = 100
""" the buffer size used to look for the EOF marker. """


def load(img):
"""
Loads the data and returns it as BytesIO object and the associated length.
Expand Down
9 changes: 7 additions & 2 deletions src/image_complete/bmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ def is_bmp(img):
return False


def is_bmp_complete(img):
def is_bmp_complete(img, strict=True):
"""
Checks whether the BMP image is complete.
https://en.wikipedia.org/wiki/BMP_file_format#File_structure
:param img: the absolute path to the BMP image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:param strict: if True then no junk data after actual data is allowed
:type strict: bool
:return: True if complete
:rtype: bool
"""
Expand All @@ -43,7 +45,10 @@ def is_bmp_complete(img):
data.seek(2, 0)
data = data.read(4)
blen = struct.unpack('I', data)
return blen[0] == data_len
if strict:
return blen[0] == data_len
else:
return blen[0] <= data_len
else:
return False
except:
Expand Down
24 changes: 19 additions & 5 deletions src/image_complete/gif.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from io import BytesIO
from .base import load
from .base import load, DEFAULT_CHECK_SIZE


def is_gif(img):
Expand All @@ -22,14 +22,18 @@ def is_gif(img):
return False


def is_gif_complete(img):
def is_gif_complete(img, strict=True, check_size=DEFAULT_CHECK_SIZE):
"""
Checks whether the GIF image is complete.
https://en.wikipedia.org/wiki/GIF#File_format
:param img: the absolute path to the GIF image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:param strict: if True then no junk data after actual data is allowed
:type strict: bool
:param check_size: the number of bytes from the end of the file to look for EOF marker
:type check_size: int
:return: True if complete
:rtype: bool
"""
Expand All @@ -38,9 +42,19 @@ def is_gif_complete(img):
if data is None:
return False
if data_len > 1:
data.seek(data_len - 1, 0)
marker = data.read(1)
return marker[0] == 59
if strict:
data.seek(data_len - 1, 0)
marker = data.read(1)
return marker[0] == 59
else:
if check_size > data_len:
check_size = data_len
data.seek(data_len - check_size, 0)
buffer = data.read(check_size)
for b in buffer:
if b == 59:
return True
return False
else:
return False
except:
Expand Down
24 changes: 19 additions & 5 deletions src/image_complete/jpg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from io import BytesIO
from .base import load
from .base import load, DEFAULT_CHECK_SIZE


def is_jpg(img):
Expand All @@ -22,14 +22,18 @@ def is_jpg(img):
return False


def is_jpg_complete(img):
def is_jpg_complete(img, strict=True, check_size=DEFAULT_CHECK_SIZE):
"""
Checks whether the JPG image is complete.
http://en.wikipedia.org/wiki/JPEG#Syntax_and_structure
:param img: the absolute path to the JPG image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:param strict: if True then no junk data after actual data is allowed
:type strict: bool
:param check_size: the number of bytes from the end of the file to look for EOF marker
:type check_size: int
:return: True if complete
:rtype: bool
"""
Expand All @@ -38,9 +42,19 @@ def is_jpg_complete(img):
if data is None:
return False
if data_len > 2:
data.seek(data_len - 2, 0)
marker = data.read(2)
return (marker[0] == 0xFF) and (marker[1] == 0xD9)
if strict:
data.seek(data_len - 2, 0)
marker = data.read(2)
return (marker[0] == 0xFF) and (marker[1] == 0xD9)
else:
if check_size > data_len:
check_size = data_len
data.seek(data_len - check_size, 0)
buffer = data.read(check_size)
for i in range(len(buffer) - 1):
if (buffer[i] == 0xFF) and (buffer[i+1] == 0xD9):
return True
return False
else:
return False
except:
Expand Down
24 changes: 19 additions & 5 deletions src/image_complete/png.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from io import BytesIO
from .base import load
from .base import load, DEFAULT_CHECK_SIZE


def is_png(img):
Expand All @@ -22,7 +22,7 @@ def is_png(img):
return False


def is_png_complete(img):
def is_png_complete(img, strict=True, check_size=DEFAULT_CHECK_SIZE):
"""
Checks whether the PNG image is complete.
Expand All @@ -32,6 +32,10 @@ def is_png_complete(img):
:param img: the absolute path to the BMP image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:param strict: if True then no junk data after actual data is allowed
:type strict: bool
:param check_size: the number of bytes from the end of the file to look for EOF marker
:type check_size: int
:return: True if complete
:rtype: bool
"""
Expand All @@ -40,9 +44,19 @@ def is_png_complete(img):
if data is None:
return False
if data_len > 8:
data.seek(data_len - 8, 0)
marker = data.read(8)
return (marker[0] == 73) and (marker[1] == 69) and (marker[2] == 78) and (marker[3] == 68)
if strict:
data.seek(data_len - 8, 0)
marker = data.read(8)
return (marker[0] == 73) and (marker[1] == 69) and (marker[2] == 78) and (marker[3] == 68)
else:
if check_size > data_len:
check_size = data_len
data.seek(data_len - check_size, 0)
buffer = data.read(check_size)
for i in range(len(buffer) - 8):
if (buffer[i] == 73) and (buffer[i+1] == 69) and (buffer[i+2] == 78) and (buffer[i+3] == 68):
return True
return False
else:
return False
except:
Expand Down
9 changes: 7 additions & 2 deletions src/image_complete/webp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ def is_webp(img):
return False


def is_webp_complete(img):
def is_webp_complete(img, strict=True):
"""
Checks whether the WebP image is complete.
https://developers.google.com/speed/webp/docs/riff_container
:param img: the absolute path to the BMP image or a bytes/BytesIO object
:type img: str or bytes or BytesIO
:param strict: if True then no junk data after actual data is allowed
:type strict: bool
:return: True if complete
:rtype: bool
"""
Expand All @@ -50,7 +52,10 @@ def is_webp_complete(img):
data.seek(4, 0)
data = data.read(4)
d_len = struct.unpack('I', data)
return d_len[0] == data_len - 8 # RIFF/4 + DATALEN/4 = 8
if strict:
return d_len[0] == data_len - 8 # RIFF/4 + DATALEN/4 = 8
else:
return d_len[0] <= data_len - 8 # RIFF/4 + DATALEN/4 = 8
else:
return False
except:
Expand Down
Loading

0 comments on commit 90e2c52

Please sign in to comment.