Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Use nginx mod_zip to generate multi-file zip downloads #1102

Merged
merged 2 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .trunk/trunk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ lint:
paths:
- frontend/src/__generated__/**
- docker/Dockerfile
- docker/nginx/js/**
definitions:
- name: eslint
files: [typescript, javascript]
Expand Down
12 changes: 0 additions & 12 deletions backend/endpoints/responses/rom.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
from endpoints.responses.assets import SaveSchema, ScreenshotSchema, StateSchema
from endpoints.responses.collection import CollectionSchema
from fastapi import Request
from fastapi.responses import StreamingResponse
from handler.metadata.igdb_handler import IGDBMetadata
from handler.metadata.moby_handler import MobyMetadata
from handler.socket_handler import socket_handler
from models.rom import Rom, RomFile
from pydantic import BaseModel, Field, computed_field

Expand Down Expand Up @@ -184,13 +182,3 @@ class UserNotesSchema(TypedDict):
user_id: int
username: str
note_raw_markdown: str


class CustomStreamingResponse(StreamingResponse):
def __init__(self, *args, **kwargs) -> None:
self.emit_body = kwargs.pop("emit_body", None)
super().__init__(*args, **kwargs)

async def stream_response(self, *args, **kwargs) -> None:
await super().stream_response(*args, **kwargs)
await socket_handler.socket_server.emit("download:complete", self.emit_body)
83 changes: 28 additions & 55 deletions backend/endpoints/rom.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from collections.abc import AsyncIterator
from datetime import datetime
import binascii
from base64 import b64encode
from shutil import rmtree
from stat import S_IFREG
from typing import Annotated
from urllib.parse import quote

Expand All @@ -13,12 +12,7 @@
)
from decorators.auth import protected_route
from endpoints.responses import MessageResponse
from endpoints.responses.rom import (
CustomStreamingResponse,
DetailedRomSchema,
RomUserSchema,
SimpleRomSchema,
)
from endpoints.responses.rom import DetailedRomSchema, RomUserSchema, SimpleRomSchema
from exceptions.endpoint_exceptions import RomNotFoundInDatabaseException
from exceptions.fs_exceptions import RomAlreadyExistsException
from fastapi import HTTPException, Query, Request, UploadFile, status
Expand All @@ -29,9 +23,10 @@
from handler.metadata import meta_igdb_handler, meta_moby_handler
from logger.logger import log
from starlette.requests import ClientDisconnect
from stream_zip import NO_COMPRESSION_64, ZIP_AUTO, AsyncMemberFile, async_stream_zip
from streaming_form_data import StreamingFormDataParser
from streaming_form_data.targets import FileTarget, NullTarget
from utils.hashing import crc32_to_hex
from utils.nginx import ZipContentLine, ZipResponse
from utils.router import APIRouter

router = APIRouter()
Expand Down Expand Up @@ -221,7 +216,7 @@ async def get_rom_content(
FileResponse: Returns one file for single file roms

Yields:
CustomStreamingResponse: Streams a file for multi-part roms
ZipResponse: Returns a response for nginx to serve a Zip file for multi-part roms
"""

rom = db_rom_handler.get_rom(id)
Expand All @@ -230,7 +225,7 @@ async def get_rom_content(
raise RomNotFoundInDatabaseException(id)

rom_path = f"{LIBRARY_BASE_PATH}/{rom.full_path}"
files_to_download = files or [r["filename"] for r in rom.files]
files_to_download = sorted(files or [r["filename"] for r in rom.files])

gantoine marked this conversation as resolved.
Show resolved Hide resolved
if not rom.multi:
return Response(
Expand All @@ -250,51 +245,29 @@ async def get_rom_content(
},
)

# Builds a generator of tuples for each member file
async def local_files() -> AsyncIterator[AsyncMemberFile]:
async def contents(filename: str) -> AsyncIterator[bytes]:
try:
async with await open_file(f"{rom_path}/{filename}", "rb") as f:
while chunk := await f.read(65536):
yield chunk
except FileNotFoundError:
log.error(f"File {rom_path}/{filename} not found!")
raise

async def m3u_file() -> AsyncIterator[bytes]:
for file in files_to_download:
yield str.encode(f"{file}\n")

now = datetime.now()

for f in files_to_download:
file_size = (await Path(f"{rom_path}/{f}").stat()).st_size
yield (
f,
now,
S_IFREG | 0o600,
ZIP_AUTO(file_size, level=0),
contents(f),
)

yield (
f"{file_name}.m3u",
now,
S_IFREG | 0o600,
NO_COMPRESSION_64,
m3u_file(),
content_lines = [
ZipContentLine(
# TODO: Use calculated CRC-32 if available.
crc32=None,
size_bytes=(await Path(f"{rom_path}/{f}").stat()).st_size,
gantoine marked this conversation as resolved.
Show resolved Hide resolved
encoded_location=quote(f"/library-zip/{rom.full_path}/{f}"),
filename=f,
)
for f in files_to_download
]

m3u_encoded_content = "\n".join([f for f in files_to_download]).encode()
m3u_base64_content = b64encode(m3u_encoded_content).decode()
m3u_line = ZipContentLine(
crc32=crc32_to_hex(binascii.crc32(m3u_encoded_content)),
size_bytes=len(m3u_encoded_content),
encoded_location=f"/decode?value={m3u_base64_content}",
filename=f"{file_name}.m3u",
)

gantoine marked this conversation as resolved.
Show resolved Hide resolved
zipped_chunks = async_stream_zip(local_files())

# Streams the zip file to the client
return CustomStreamingResponse(
zipped_chunks,
media_type="application/zip",
headers={
"Content-Disposition": f'attachment; filename="{quote(file_name)}.zip"',
},
emit_body={"id": rom.id},
return ZipResponse(
content_lines=content_lines + [m3u_line],
filename=f"{quote(file_name)}.zip",
)


Expand Down
3 changes: 2 additions & 1 deletion backend/handler/filesystem/firmware_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from fastapi import UploadFile
from logger.logger import log
from utils.filesystem import iter_files
from utils.hashing import crc32_to_hex

from .base_handler import FSHandler

Expand Down Expand Up @@ -61,7 +62,7 @@ def calculate_file_hashes(self, firmware_path: str, file_name: str):
crc_c = binascii.crc32(chunk, crc_c)

return {
"crc_hash": (crc_c & 0xFFFFFFFF).to_bytes(4, byteorder="big").hex(),
"crc_hash": crc32_to_hex(crc_c),
"md5_hash": md5_h.hexdigest(),
"sha1_hash": sha1_h.hexdigest(),
}
Expand Down
3 changes: 2 additions & 1 deletion backend/handler/filesystem/roms_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from exceptions.fs_exceptions import RomAlreadyExistsException, RomsNotFoundException
from models.rom import RomFile
from utils.filesystem import iter_directories, iter_files
from utils.hashing import crc32_to_hex

from .base_handler import (
LANGUAGES_BY_SHORTCODE,
Expand Down Expand Up @@ -271,7 +272,7 @@ def get_rom_hashes(self, rom: str, roms_path: str) -> dict[str, str]:
)

return {
"crc_hash": (crc_c & 0xFFFFFFFF).to_bytes(4, byteorder="big").hex(),
"crc_hash": crc32_to_hex(crc_c),
"md5_hash": md5_h.hexdigest(),
"sha1_hash": sha1_h.hexdigest(),
}
Expand Down
2 changes: 2 additions & 0 deletions backend/utils/hashing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def crc32_to_hex(value: int) -> str:
return (value & 0xFFFFFFFF).to_bytes(4, byteorder="big").hex()
gantoine marked this conversation as resolved.
Show resolved Hide resolved
49 changes: 49 additions & 0 deletions backend/utils/nginx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import dataclasses
from collections.abc import Collection
from typing import Any

from fastapi.responses import Response


@dataclasses.dataclass(frozen=True)
class ZipContentLine:
"""Dataclass for lines returned in the response body, for usage with the `mod_zip` module.

Reference:
https://github.com/evanmiller/mod_zip?tab=readme-ov-file#usage
"""

crc32: str | None
size_bytes: int
encoded_location: str
filename: str

def __str__(self) -> str:
crc32 = self.crc32 or "-"
return f"{crc32} {self.size_bytes} {self.encoded_location} {self.filename}"


class ZipResponse(Response):
"""Response class for returning a ZIP archive with multiple files, using the `mod_zip` module."""

def __init__(
self,
*,
content_lines: Collection[ZipContentLine],
filename: str,
**kwargs: Any,
):
if kwargs.get("content"):
raise ValueError(
"Argument 'content' must not be provided, as it is generated from 'content_lines'"
)

kwargs["content"] = "\n".join(str(line) for line in content_lines)
kwargs.setdefault("headers", {}).update(
{
"Content-Disposition": f'attachment; filename="{filename}"',
"X-Archive-Files": "zip",
}
)
gantoine marked this conversation as resolved.
Show resolved Hide resolved

super().__init__(**kwargs)
35 changes: 34 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,42 @@ WORKDIR /src
COPY ./pyproject.toml ./poetry.lock /src/
RUN poetry install --no-ansi --no-cache --only main

# Build nginx modules
FROM alpine:${ALPINE_VERSION} AS nginx-build

RUN apk add --no-cache \
gcc \
git \
libc-dev \
make \
pcre-dev \
zlib-dev

ARG NGINX_VERSION
# The specified commit SHA is the latest commit on the `master` branch at the time of writing.
# It includes a fix to correctly calculate CRC-32 checksums when using upstream subrequests.
# TODO: Move to a tagged release of `mod_zip`, once a version newer than 1.3.0 is released.
ARG NGINX_MOD_ZIP_SHA=8e65b82c82c7890f67a6107271c127e9881b6313

# Clone both nginx and `ngx_http_zip_module` repositories, needed to compile the module from source.
# This is needed to be able to dinamically load it as a module in the final image. `nginx` Docker
# images do not have a simple way to include third-party modules.
RUN git clone https://github.com/evanmiller/mod_zip.git && \
cd ./mod_zip && \
git checkout "${NGINX_MOD_ZIP_SHA}" && \
cd ../ && \
git clone --branch "release-${NGINX_VERSION}" --depth 1 https://github.com/nginx/nginx.git && \
cd ./nginx && \
./auto/configure --with-compat --add-dynamic-module=../mod_zip/ && \
make -f ./objs/Makefile modules && \
chmod 644 ./objs/ngx_http_zip_module.so
gantoine marked this conversation as resolved.
Show resolved Hide resolved

# Setup frontend and backend
FROM nginx:${NGINX_VERSION}-alpine${ALPINE_VERSION}-slim AS production-stage
FROM nginx:${NGINX_VERSION}-alpine${ALPINE_VERSION} AS production-stage
ARG WEBSERVER_FOLDER=/var/www/html

COPY --from=nginx-build ./nginx/objs/ngx_http_zip_module.so /usr/lib/nginx/modules/

COPY --from=front-build-stage /front/dist ${WEBSERVER_FOLDER}
COPY ./frontend/assets/default ${WEBSERVER_FOLDER}/assets/default
COPY ./frontend/assets/emulatorjs ${WEBSERVER_FOLDER}/assets/emulatorjs
Expand All @@ -62,6 +94,7 @@ COPY ./backend /backend

# Setup init script and config files
COPY ./docker/init_scripts/* /
COPY ./docker/nginx/js/ /etc/nginx/js/
COPY ./docker/nginx/default.conf /etc/nginx/nginx.conf

# User permissions
Expand Down
35 changes: 35 additions & 0 deletions docker/nginx/default.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
load_module modules/ngx_http_js_module.so;
load_module modules/ngx_http_zip_module.so;

worker_processes auto;
pid /tmp/nginx.pid;

Expand Down Expand Up @@ -29,6 +32,8 @@ http {
ssl_protocols TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; # Dropping SSLv3, ref: POODLE
ssl_prefer_server_ciphers on;

js_import /etc/nginx/js/decode.js;

map $time_iso8601 $date {
~([^+]+)T $1;
}
Expand Down Expand Up @@ -105,5 +110,35 @@ http {
internal;
alias /romm/library;
}

# This location, and the related server at port 8081, are used to serve files when
# using the `mod_zip` module. This is because the `mod_zip` module does not support
# calculating CRC-32 values when using subrequests pointing directly to internal
# locations that access the filesystem.
# TODO: If that gets fixed, this workaround can be removed, and the `/library` location
# can be used directly (also removing the server at port 8081).
# Related issue: https://github.com/evanmiller/mod_zip/issues/90
gantoine marked this conversation as resolved.
Show resolved Hide resolved
location /library-zip {
internal;
rewrite ^/library-zip/(.*)$ /library/$1 break;
proxy_pass http://localhost:8081;
# Proxy buffering must be disabled, for the module to correctly calculate CRC-32 values.
proxy_buffering off;
}

# Internal decoding endpoint, used to decode base64 encoded data
location /decode {
internal;
js_content decode.decodeBase64;
}
}

server {
listen 8081;
server_name localhost;

location /library {
alias /romm/library;
}
}
}
19 changes: 19 additions & 0 deletions docker/nginx/js/decode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Decode a Base64 encoded string received as a query parameter named 'value',
// and return the decoded value in the response body.
function decodeBase64(r) {
var encodedValue = r.args.value;

if (!encodedValue) {
r.return(400, "Missing 'value' query parameter");
return;
}

try {
var decodedValue = atob(encodedValue);
r.return(200, decodedValue);
} catch (e) {
r.return(400, "Invalid Base64 encoding");
}
}

export default { decodeBase64 };
Loading
Loading