jupyter-server · hadim · Jun 29, 2020 · Jun 29, 2020 · Jul 7, 2020 · Jul 7, 2020
diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
@@ -86,6 +86,8 @@ async def get(self, path, include_body=True):
 # The delay in ms at which we send the chunk of data
 # to the client.
 ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100
+
+# Supported archive format
 SUPPORTED_FORMAT = [
     "zip",
     "tgz",
@@ -99,6 +101,10 @@ async def get(self, path, include_body=True):
 ]
 
 
+DEFAULT_DIRECTORY_SIZE_LIMIT = 1073741824  # 1GB
+DEFAULT_ARCHIVE_FORMAT = "zip"
+
+
 class ArchiveStream:
     """ArchiveStream is an abstraction layer to a Python archive allowing
     to stream archive files.
@@ -145,6 +151,13 @@ def make_writer(handler, archive_format="zip"):
     return archive_file
 
 
+def get_folder_size(dir_path):
+    """Return the size in bytes of a given directory.
+    """
+    dir_path = pathlib.Path(dir_path)
+    return sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file())
+
+
 def make_reader(archive_path):
     """Return the appropriate archive file instance given
     the extension's path of `archive_path`.
@@ -183,15 +196,40 @@ async def get(self, archive_path, include_body=False):
             raise web.HTTPError(404)
 
         archive_token = self.get_argument("archiveToken")
-        archive_format = self.get_argument("archiveFormat", "zip")
+        archive_format = self.get_argument("archiveFormat", DEFAULT_ARCHIVE_FORMAT)
+        folder_size_limit = self.get_argument("folderSizeLimit", None)
+
+        # Check whether the specified archive format is supported.
         if archive_format not in SUPPORTED_FORMAT:
             self.log.error("Unsupported format {}.".format(archive_format))
             raise web.HTTPError(404)
 
+        # If the folder size limit is not specified in the request, a
+        # default size limit is used.
+        try:
+            folder_size_limit_num = int(folder_size_limit)
+
+        except (ValueError, TypeError):
+            self.log.warning(
+                "folderSizeLimit is a not a valid number: {}.".format(folder_size_limit)
+            )
+            folder_size_limit_num = DEFAULT_DIRECTORY_SIZE_LIMIT
+
         root_dir = pathlib.Path(cm.root_dir)
         archive_path = root_dir / url2path(archive_path)
         archive_filename = archive_path.with_suffix(".{}".format(archive_format)).name
 
+        # Check whether the archive folder is not larger than the size limit.
+        folder_size = get_folder_size(archive_path)
+        print(folder_size)
+        if folder_size > folder_size_limit_num:
+            self.log.error(
+                "Archive folder size is larger than the size limit: {} bytes with a size limit of {}.".format(
+                    folder_size, folder_size_limit_num
+                )
+            )
+            raise web.HTTPError(413)
+
         self.log.info(
             "Prepare {} for archiving and downloading.".format(archive_filename)
         )

diff --git a/tests/test_files.py b/tests/test_files.py
@@ -140,7 +140,7 @@ async def test_old_files_redirect(fetch, serverapp, root_dir):
     #     self.assertIn("filename*=utf-8''test.txt", disposition)
 
 
-async def test_download_directory(fetch, serverapp, root_dir):
+def make_fake_folder(root_dir):
     # Create a dummy directory.
     root_dir = pathlib.Path(str(root_dir))
     archive_dir_path = root_dir / "download-archive-dir"
@@ -159,6 +159,14 @@ async def test_download_directory(fetch, serverapp, root_dir):
         "download-archive-dir/test3.md",
     }
 
+    dir_path = str(archive_dir_path.relative_to(root_dir))
+
+    return dir_path, file_lists
+
+
+async def test_download_directory(fetch, serverapp, root_dir):
+    dir_path, file_lists = make_fake_folder(root_dir)
+
     archive_formats = {
         "zip": "r",
         "tgz": "r|gz",
@@ -175,8 +183,8 @@ async def test_download_directory(fetch, serverapp, root_dir):
 
     for archive_format, mode in archive_formats.items():
         params = dict(archiveToken=archive_token, archiveFormat=archive_format)
-        dir_path = str(archive_dir_path.relative_to(root_dir))
         r = await fetch("directories", dir_path, method="GET", params=params)
+
         assert r.code == 200
         assert r.headers.get("content-type") == "application/octet-stream"
         assert r.headers.get("cache-control") == "no-cache"
@@ -188,6 +196,43 @@ async def test_download_directory(fetch, serverapp, root_dir):
                 assert set(map(lambda m: m.name, tf.getmembers())) == file_lists
 
 
+async def test_download_directory_size_limit(fetch, serverapp, root_dir):
+
+    dir_path, file_lists = make_fake_folder(root_dir)
+    archive_token = 59487596
+    archive_format = "zip"
+    mode = "r"
+
+    # The above fake folder has a size of 18
+    folder_size_limit = 10
+
+    params = dict(
+        archiveToken=archive_token,
+        archiveFormat=archive_format,
+        folderSizeLimit=folder_size_limit,
+    )
+
+    with pytest.raises(tornado.httpclient.HTTPClientError) as e:
+        await fetch("directories", dir_path, method="GET", params=params)
+    assert expected_http_error(e, 413)
+
+async def test_download_wrong_archive_format(fetch, serverapp, root_dir):
+
+    dir_path, file_lists = make_fake_folder(root_dir)
+    archive_token = 59487596
+    archive_format = "a_format_that_does_not_exist"
+    mode = "r"
+
+
+    params = dict(
+        archiveToken=archive_token,
+        archiveFormat=archive_format,
+    )
+
+    with pytest.raises(tornado.httpclient.HTTPClientError) as e:
+        await fetch("directories", dir_path, method="GET", params=params)
+    assert expected_http_error(e, 404)
+
 async def test_extract_directory(fetch, serverapp, root_dir):
 
     format_mode = {