Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate jupyter-archive #252

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion jupyter_server/files/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ async def get(self, path, include_body=True):
# The delay in ms at which we send the chunk of data
# to the client.
ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100

# Supported archive format
SUPPORTED_FORMAT = [
"zip",
"tgz",
Expand All @@ -99,6 +101,10 @@ async def get(self, path, include_body=True):
]


DEFAULT_DIRECTORY_SIZE_LIMIT = 1073741824 # 1GB
DEFAULT_ARCHIVE_FORMAT = "zip"


class ArchiveStream:
hadim marked this conversation as resolved.
Show resolved Hide resolved
"""ArchiveStream is an abstraction layer to a Python archive allowing
to stream archive files.
Expand Down Expand Up @@ -145,6 +151,13 @@ def make_writer(handler, archive_format="zip"):
return archive_file


def get_folder_size(dir_path):
"""Return the size in bytes of a given directory.
"""
dir_path = pathlib.Path(dir_path)
return sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file())


def make_reader(archive_path):
"""Return the appropriate archive file instance given
the extension's path of `archive_path`.
Expand Down Expand Up @@ -183,15 +196,40 @@ async def get(self, archive_path, include_body=False):
raise web.HTTPError(404)

archive_token = self.get_argument("archiveToken")
archive_format = self.get_argument("archiveFormat", "zip")
archive_format = self.get_argument("archiveFormat", DEFAULT_ARCHIVE_FORMAT)
folder_size_limit = self.get_argument("folderSizeLimit", None)

# Check whether the specified archive format is supported.
if archive_format not in SUPPORTED_FORMAT:
self.log.error("Unsupported format {}.".format(archive_format))
raise web.HTTPError(404)

# If the folder size limit is not specified in the request, a
# default size limit is used.
try:
folder_size_limit_num = int(folder_size_limit)

except (ValueError, TypeError):
self.log.warning(
"folderSizeLimit is a not a valid number: {}.".format(folder_size_limit)
)
folder_size_limit_num = DEFAULT_DIRECTORY_SIZE_LIMIT

root_dir = pathlib.Path(cm.root_dir)
archive_path = root_dir / url2path(archive_path)
archive_filename = archive_path.with_suffix(".{}".format(archive_format)).name

# Check whether the archive folder is not larger than the size limit.
folder_size = get_folder_size(archive_path)
print(folder_size)
hadim marked this conversation as resolved.
Show resolved Hide resolved
if folder_size > folder_size_limit_num:
self.log.error(
"Archive folder size is larger than the size limit: {} bytes with a size limit of {}.".format(
folder_size, folder_size_limit_num
)
)
raise web.HTTPError(413)

self.log.info(
"Prepare {} for archiving and downloading.".format(archive_filename)
)
Expand Down
49 changes: 47 additions & 2 deletions tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ async def test_old_files_redirect(fetch, serverapp, root_dir):
# self.assertIn("filename*=utf-8''test.txt", disposition)


async def test_download_directory(fetch, serverapp, root_dir):
def make_fake_folder(root_dir):
# Create a dummy directory.
root_dir = pathlib.Path(str(root_dir))
archive_dir_path = root_dir / "download-archive-dir"
Expand All @@ -159,6 +159,14 @@ async def test_download_directory(fetch, serverapp, root_dir):
"download-archive-dir/test3.md",
}

dir_path = str(archive_dir_path.relative_to(root_dir))

return dir_path, file_lists


async def test_download_directory(fetch, serverapp, root_dir):
dir_path, file_lists = make_fake_folder(root_dir)

archive_formats = {
"zip": "r",
"tgz": "r|gz",
Expand All @@ -175,8 +183,8 @@ async def test_download_directory(fetch, serverapp, root_dir):

for archive_format, mode in archive_formats.items():
params = dict(archiveToken=archive_token, archiveFormat=archive_format)
dir_path = str(archive_dir_path.relative_to(root_dir))
r = await fetch("directories", dir_path, method="GET", params=params)

assert r.code == 200
assert r.headers.get("content-type") == "application/octet-stream"
assert r.headers.get("cache-control") == "no-cache"
Expand All @@ -188,6 +196,43 @@ async def test_download_directory(fetch, serverapp, root_dir):
assert set(map(lambda m: m.name, tf.getmembers())) == file_lists


async def test_download_directory_size_limit(fetch, serverapp, root_dir):

dir_path, file_lists = make_fake_folder(root_dir)
archive_token = 59487596
archive_format = "zip"
mode = "r"

# The above fake folder has a size of 18
folder_size_limit = 10

params = dict(
archiveToken=archive_token,
archiveFormat=archive_format,
folderSizeLimit=folder_size_limit,
)

with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await fetch("directories", dir_path, method="GET", params=params)
assert expected_http_error(e, 413)

async def test_download_wrong_archive_format(fetch, serverapp, root_dir):

dir_path, file_lists = make_fake_folder(root_dir)
archive_token = 59487596
archive_format = "a_format_that_does_not_exist"
mode = "r"


params = dict(
archiveToken=archive_token,
archiveFormat=archive_format,
)

with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await fetch("directories", dir_path, method="GET", params=params)
assert expected_http_error(e, 404)

async def test_extract_directory(fetch, serverapp, root_dir):

format_mode = {
Expand Down