From 6da97f5986e7af0ad10c4702522d7f95b5a0a31a Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 30 May 2023 14:52:55 -0700 Subject: [PATCH 1/9] add tests --- gradio/routes.py | 6 +++--- gradio/utils.py | 12 ++++++++---- test/test_utils.py | 12 ++++++++++++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/gradio/routes.py b/gradio/routes.py index 753c55c7c634b..83aae08287de7 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -329,15 +329,15 @@ async def file(path_or_url: str, request: fastapi.Request): if in_blocklist: raise HTTPException(403, f"File not allowed: {path_or_url}.") - in_app_dir = utils.abspath(app.cwd) in abs_path.parents + in_app_dir = utils.is_in_or_equal(abs_path, app.cwd) created_by_app = str(abs_path) in set().union(*blocks.temp_file_sets) - in_file_dir = any( + in_allowlist = any( utils.is_in_or_equal(abs_path, allowed_path) for allowed_path in blocks.allowed_paths ) was_uploaded = utils.abspath(app.uploaded_file_dir) in abs_path.parents - if in_app_dir or created_by_app or in_file_dir or was_uploaded: + if in_app_dir or created_by_app or in_allowlist or was_uploaded: if not abs_path.exists(): raise HTTPException(404, "File not found") if abs_path.is_dir(): diff --git a/gradio/utils.py b/gradio/utils.py index 90d8657713b1f..6994d8bb4210e 100644 --- a/gradio/utils.py +++ b/gradio/utils.py @@ -844,12 +844,16 @@ def is_in_or_equal(path_1: str | Path, path_2: str | Path): True if path_1 is a descendant (i.e. located within) path_2 or if the paths are the same, returns False otherwise. Parameters: - path_1: str or Path (can be a file or directory) + path_1: str or Path (must be a file) path_2: str or Path (can be a file or directory) """ - return (abspath(path_2) in abspath(path_1).parents) or abspath(path_1) == abspath( - path_2 - ) + path_1, path_2 = abspath(path_1), abspath(path_2) + try: + if str(path_1.relative_to(path_2)).startswith(".."): # prevent path traversal + return False + except ValueError: + return False + return True def get_serializer_name(block: Block) -> str | None: diff --git a/test/test_utils.py b/test/test_utils.py index 911b851aef308..4a22954361368 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -39,6 +39,7 @@ sanitize_value_for_csv, tex2svg, validate_url, + is_in_or_equal, ) os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" @@ -623,3 +624,14 @@ def test_tex2svg_preserves_matplotlib_backend(): ): tex2svg("$$$1+1=2$$$") assert matplotlib.get_backend() == "svg" + + +def test_is_in_or_equal(): + assert is_in_or_equal("files/lion.jpg", "files/lion.jpg") + assert is_in_or_equal("files/lion.jpg", "files") + assert not is_in_or_equal("files", "files/lion.jpg") + assert is_in_or_equal(r"C:\files\lion.jpg", r"C:\files") + assert not is_in_or_equal(r"C:\files\..\..\C:\lion.jpg", r"C:\files") + assert is_in_or_equal("/home/usr/notes.txt", "/home/usr/") + assert is_in_or_equal("/home/usr/../usr/notes.txt", "/home/usr/") + assert not is_in_or_equal("/home/usr/../../etc/notes.txt", "/home/usr/") From f7097094a6bbe50d7b071fb9361b884d437f6ab9 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 30 May 2023 14:54:20 -0700 Subject: [PATCH 2/9] add tests --- gradio/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradio/utils.py b/gradio/utils.py index 6994d8bb4210e..9af39fbc1c6cf 100644 --- a/gradio/utils.py +++ b/gradio/utils.py @@ -844,7 +844,7 @@ def is_in_or_equal(path_1: str | Path, path_2: str | Path): True if path_1 is a descendant (i.e. located within) path_2 or if the paths are the same, returns False otherwise. Parameters: - path_1: str or Path (must be a file) + path_1: str or Path (should be a file) path_2: str or Path (can be a file or directory) """ path_1, path_2 = abspath(path_1), abspath(path_2) From 3eb660e81b84b95ede26dedb7861754fabdba2b2 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 30 May 2023 14:55:16 -0700 Subject: [PATCH 3/9] file route fix --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff71c8e3015d0..bec37266a8bcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Make `Blocks.load` behave like other event listeners (allows chaining `then` off of it) [@anentropic](https://github.com/anentropic/) in [PR 4304](https://github.com/gradio-app/gradio/pull/4304) - Respect `interactive=True` in output components of a `gr.Interface` by [@abidlabs](https://github.com/abidlabs) in [PR 4356](https://github.com/gradio-app/gradio/pull/4356). - Remove unused frontend code by [@akx](https://github.com/akx) in [PR 4275](https://github.com/gradio-app/gradio/pull/4275) +- Prevent path traversal in `/file` routes by [@abidlabs](https://github.com/abidlabs) in [PR 4370](https://github.com/gradio-app/gradio/pull/4370). ## Other Changes: From aa3c567c0e68539cb225294a7ebefb64035daae3 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 30 May 2023 15:07:50 -0700 Subject: [PATCH 4/9] update tests --- test/test_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4a22954361368..3c35d04741bcf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -630,8 +630,7 @@ def test_is_in_or_equal(): assert is_in_or_equal("files/lion.jpg", "files/lion.jpg") assert is_in_or_equal("files/lion.jpg", "files") assert not is_in_or_equal("files", "files/lion.jpg") - assert is_in_or_equal(r"C:\files\lion.jpg", r"C:\files") - assert not is_in_or_equal(r"C:\files\..\..\C:\lion.jpg", r"C:\files") assert is_in_or_equal("/home/usr/notes.txt", "/home/usr/") assert is_in_or_equal("/home/usr/../usr/notes.txt", "/home/usr/") + assert is_in_or_equal("/home/usr/subdirectory", "/home/usr/notes.txt") assert not is_in_or_equal("/home/usr/../../etc/notes.txt", "/home/usr/") From aaebbb63ef9771a7093c60a96284e140ce38090d Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 30 May 2023 15:08:20 -0700 Subject: [PATCH 5/9] formatting --- test/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 3c35d04741bcf..0df5de8bfe258 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -31,6 +31,7 @@ format_ner_list, get_type_hints, ipython_check, + is_in_or_equal, is_special_typed_parameter, kaggle_check, readme_to_html, @@ -39,7 +40,6 @@ sanitize_value_for_csv, tex2svg, validate_url, - is_in_or_equal, ) os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" From 934167ee5c1a5eed433fa906cf8f8c52f653f6f5 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Tue, 30 May 2023 15:11:56 -0700 Subject: [PATCH 6/9] file access --- test/test_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 0df5de8bfe258..8f08a516242e7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -631,6 +631,5 @@ def test_is_in_or_equal(): assert is_in_or_equal("files/lion.jpg", "files") assert not is_in_or_equal("files", "files/lion.jpg") assert is_in_or_equal("/home/usr/notes.txt", "/home/usr/") - assert is_in_or_equal("/home/usr/../usr/notes.txt", "/home/usr/") - assert is_in_or_equal("/home/usr/subdirectory", "/home/usr/notes.txt") + assert not is_in_or_equal("/home/usr/subdirectory", "/home/usr/notes.txt") assert not is_in_or_equal("/home/usr/../../etc/notes.txt", "/home/usr/") From 17680f825120a3402fa089b0bb46ef30685b63dc Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Wed, 31 May 2023 12:57:24 -0700 Subject: [PATCH 7/9] cleanup --- gradio/routes.py | 55 +++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/gradio/routes.py b/gradio/routes.py index 6f15e9e12ee58..7b5f12052bdf6 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -321,12 +321,18 @@ async def file(path_or_url: str, request: fastapi.Request): return RedirectResponse( url=path_or_url, status_code=status.HTTP_302_FOUND ) + abs_path = utils.abspath(path_or_url) + if not abs_path.exists(): + raise HTTPException(404, f"File not found: {path_or_url}.") + in_blocklist = any( utils.is_in_or_equal(abs_path, blocked_path) for blocked_path in blocks.blocked_paths ) - if in_blocklist or any(part.startswith(".") for part in abs_path.parts): + is_dotfile = any(part.startswith(".") for part in abs_path.parts) + is_dir = abs_path.is_dir() + if in_blocklist or is_dotfile or is_dir: raise HTTPException(403, f"File not allowed: {path_or_url}.") in_app_dir = utils.is_in_or_equal(abs_path, app.cwd) @@ -335,35 +341,26 @@ async def file(path_or_url: str, request: fastapi.Request): utils.is_in_or_equal(abs_path, allowed_path) for allowed_path in blocks.allowed_paths ) - was_uploaded = utils.abspath(app.uploaded_file_dir) in abs_path.parents - - if in_app_dir or created_by_app or in_allowlist or was_uploaded: - if not abs_path.exists(): - raise HTTPException(404, "File not found") - if abs_path.is_dir(): - raise HTTPException(403) - - range_val = request.headers.get("Range", "").strip() - if range_val.startswith("bytes=") and "-" in range_val: - range_val = range_val[6:] - start, end = range_val.split("-") - if start.isnumeric() and end.isnumeric(): - start = int(start) - end = int(end) - response = ranged_response.RangedFileResponse( - abs_path, - ranged_response.OpenRange(start, end), - dict(request.headers), - stat_result=os.stat(abs_path), - ) - return response - return FileResponse(abs_path, headers={"Accept-Ranges": "bytes"}) + was_uploaded = utils.is_in_or_equal(abs_path, app.uploaded_file_dir) - else: - raise HTTPException( - 403, - f"File cannot be fetched: {path_or_url}. All files must contained within the Gradio python app working directory, or be a temp file created by the Gradio python app.", - ) + if not (in_app_dir or created_by_app or in_allowlist or was_uploaded): + raise HTTPException(403, f"File not allowed: {path_or_url}.") + + range_val = request.headers.get("Range", "").strip() + if range_val.startswith("bytes=") and "-" in range_val: + range_val = range_val[6:] + start, end = range_val.split("-") + if start.isnumeric() and end.isnumeric(): + start = int(start) + end = int(end) + response = ranged_response.RangedFileResponse( + abs_path, + ranged_response.OpenRange(start, end), + dict(request.headers), + stat_result=os.stat(abs_path), + ) + return response + return FileResponse(abs_path, headers={"Accept-Ranges": "bytes"}) @app.get("/file/{path:path}", dependencies=[Depends(login_check)]) async def file_deprecated(path: str, request: fastapi.Request): From edfe1920018110ecbd6c4cb33be61dfbbeaa041e Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Wed, 31 May 2023 13:01:28 -0700 Subject: [PATCH 8/9] cleaning --- gradio/routes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gradio/routes.py b/gradio/routes.py index 4c8324cff2b69..ac51c8874101f 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -338,6 +338,7 @@ async def file(path_or_url: str, request: fastapi.Request): ) is_dotfile = any(part.startswith(".") for part in abs_path.parts) is_dir = abs_path.is_dir() + if in_blocklist or is_dotfile or is_dir: raise HTTPException(403, f"File not allowed: {path_or_url}.") From ab152fb98c0045175f0bde58e996edf33a3c05f3 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Wed, 31 May 2023 13:25:42 -0700 Subject: [PATCH 9/9] fix tests --- gradio/routes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gradio/routes.py b/gradio/routes.py index ac51c8874101f..8990ce9a28cb7 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -329,8 +329,6 @@ async def file(path_or_url: str, request: fastapi.Request): ) abs_path = utils.abspath(path_or_url) - if not abs_path.exists(): - raise HTTPException(404, f"File not found: {path_or_url}.") in_blocklist = any( utils.is_in_or_equal(abs_path, blocked_path) @@ -341,6 +339,8 @@ async def file(path_or_url: str, request: fastapi.Request): if in_blocklist or is_dotfile or is_dir: raise HTTPException(403, f"File not allowed: {path_or_url}.") + if not abs_path.exists(): + raise HTTPException(404, f"File not found: {path_or_url}.") in_app_dir = utils.is_in_or_equal(abs_path, app.cwd) created_by_app = str(abs_path) in set().union(*blocks.temp_file_sets)