Skip to content

Commit

Permalink
backend: create endpoint for downloading data created by cron
Browse files Browse the repository at this point in the history
This solution raises two concerns for the future once we have a
lot of data collected:
- it will be a while until the data will be tarred - creating delay before
  actual download (after cca 200 feedbacks relly noticable delay :/)
  look at example with only 300 (tiny!!) feedbacks:
  [root@backend persistent]# time tar -zcf results.tar.gz results/
  real m9.473s
  user m9.328s
  sys m0.483s
- downloading takes also some time

-> thus blocking the whole worker during this. IIRC we have 8 workers thus
8 downloads and API is unresponsive.

Solution:
- how to solve the delay before download:
  fedora-copr#64
- the issue above is the slowest, once that will be resolved and people
  still complain, do this:
  fedora-copr#65
  • Loading branch information
nikromen committed Jan 7, 2024
1 parent 46a8cc7 commit 6f8f50c
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
40 changes: 38 additions & 2 deletions backend/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import logging
import os
from base64 import b64decode
from datetime import datetime
from http import HTTPStatus
from pathlib import Path
from typing import Iterator

from fastapi import FastAPI, Request
from fastapi import FastAPI, Request, Depends
from fastapi.exceptions import RequestValidationError
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
Expand All @@ -33,6 +36,7 @@
FeedbackSchema,
schema_inp_to_out,
)
from backend.spells import make_tar, get_temporary_dir
from backend.store import Storator3000

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -263,3 +267,35 @@ def frontend_review_latest() -> FeedbackSchema:
with open(feedback_file) as file:
content = json.loads(file.read())
return FeedbackSchema(**content)


def _make_tpm_tar_file_from_results() -> Iterator[Path]:
results = os.environ.get("FEEDBACK_DIR")
if results is None:
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="No data found")

with get_temporary_dir() as tmp_dir:
tar_path = make_tar(
f"results-{int(datetime.now().timestamp())}.tar.gz", Path(results), tmp_dir
)
try:
yield tar_path
finally:
os.unlink(tar_path)


@app.get("/download", response_class=StreamingResponse)
def download_results(_tar_path=Depends(_make_tpm_tar_file_from_results)):
def iter_large_file(file_name: Path):
with open(file_name, mode="rb") as file:
yield from file

return StreamingResponse(
iter_large_file(_tar_path),
media_type="application/x-tar",
# TODO: https://github.com/fedora-copr/log-detective-website/issues/63
headers={
"Content-Disposition": f"attachment; filename={_tar_path.name}", # noqa: E702
"Content-Length": str(_tar_path.stat().st_size),
},
)
20 changes: 20 additions & 0 deletions backend/spells.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import shutil
import tarfile
import tempfile
from contextlib import contextmanager
from pathlib import Path
Expand All @@ -16,3 +17,22 @@ def get_temporary_dir() -> Iterator[Path]:
yield temp_dir
finally:
shutil.rmtree(temp_dir)


def make_tar(name: str, source: Path, destination: Path) -> Path:
"""
Make tar from source path.
Args:
name: Name of the tar file
source: Source to be tarred
destination: Folder where to put tar file
Returns:
Path where to find a tar file.
"""
tar_path = destination / name
with tarfile.open(tar_path, "w:gz") as tar_f:
tar_f.add(source, arcname=name)

return tar_path

0 comments on commit 6f8f50c

Please sign in to comment.