Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip compressed files if theyre invalid #1097

Merged
merged 1 commit into from
Aug 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions backend/endpoints/sockets/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,14 @@ async def scan_handler(_sid: str, options: dict):
roms_ids = options.get("roms_ids", [])
metadata_sources = options.get("apis", [])

# Uncomment this to run scan in the current process
# await scan_platforms(
# platform_ids=platform_ids,
# scan_type=scan_type,
# roms_ids=roms_ids,
# metadata_sources=metadata_sources,
# )

return high_prio_queue.enqueue(
scan_platforms,
platform_ids,
Expand Down
66 changes: 43 additions & 23 deletions backend/handler/filesystem/roms_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,40 +63,62 @@ def is_compressed_file(file_path: str) -> bool:
)


def read_basic_file(file_path: Path) -> Iterator[bytes]:
with open(file_path, "rb") as f:
while chunk := f.read(FILE_READ_CHUNK_SIZE):
yield chunk


def read_zip_file(file_path: Path) -> Iterator[bytes]:
with zipfile.ZipFile(file_path, "r") as z:
for file in z.namelist():
with z.open(file, "r") as f:
while chunk := f.read(FILE_READ_CHUNK_SIZE):
yield chunk
try:
with zipfile.ZipFile(file_path, "r") as z:
for file in z.namelist():
with z.open(file, "r") as f:
while chunk := f.read(FILE_READ_CHUNK_SIZE):
yield chunk
except zipfile.BadZipFile:
for chunk in read_basic_file(file_path):
yield chunk


def read_tar_file(file_path: Path, mode: str = "r") -> Iterator[bytes]:
with tarfile.open(file_path, mode) as f:
for member in f.getmembers():
# Ignore metadata files created by macOS
if member.name.startswith("._"):
continue
try:
with tarfile.open(file_path, mode) as f:
for member in f.getmembers():
# Ignore metadata files created by macOS
if member.name.startswith("._"):
continue

with f.extractfile(member) as ef: # type: ignore
while chunk := ef.read(FILE_READ_CHUNK_SIZE):
yield chunk
with f.extractfile(member) as ef: # type: ignore
while chunk := ef.read(FILE_READ_CHUNK_SIZE):
yield chunk
except tarfile.ReadError:
for chunk in read_basic_file(file_path):
yield chunk


def read_gz_file(file_path: Path) -> Iterator[bytes]:
return read_tar_file(file_path, "r:gz")


def read_7z_file(file_path: Path) -> Iterator[bytes]:
with py7zr.SevenZipFile(file_path, "r") as f:
for _name, bio in f.readall().items():
while chunk := bio.read(FILE_READ_CHUNK_SIZE):
yield chunk
try:
with py7zr.SevenZipFile(file_path, "r") as f:
for _name, bio in f.readall().items():
while chunk := bio.read(FILE_READ_CHUNK_SIZE):
yield chunk
except py7zr.Bad7zFile:
for chunk in read_basic_file(file_path):
yield chunk


def read_bz2_file(file_path: Path) -> Iterator[bytes]:
with bz2.BZ2File(file_path, "rb") as f:
while chunk := f.read(FILE_READ_CHUNK_SIZE):
try:
with bz2.BZ2File(file_path, "rb") as f:
while chunk := f.read(FILE_READ_CHUNK_SIZE):
yield chunk
except EOFError:
for chunk in read_basic_file(file_path):
yield chunk


Expand Down Expand Up @@ -221,10 +243,8 @@ def update_hashes(chunk: bytes):
update_hashes(chunk)

else:
with open(file_path, "rb") as f:
# Read in chunks to avoid memory issues
while chunk := f.read(FILE_READ_CHUNK_SIZE):
update_hashes(chunk)
for chunk in read_basic_file(file_path):
update_hashes(chunk)

return crc_c, md5_h, sha1_h

Expand Down
Loading