From 37967617bd97615fb6f3b44e7750c0e0be58479a Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Mon, 5 Jun 2023 12:51:33 -0500 Subject: [PATCH] Restrict proxy URLs (#4406) * restrict proxy urls * fix test * changelog * fix tests * Update gradio/routes.py Co-authored-by: Aarni Koskela * Update gradio/routes.py Co-authored-by: Aarni Koskela * fix test * docstring * bring back hf url * fix tests * Update gradio/blocks.py Co-authored-by: Aarni Koskela * Update gradio/blocks.py Co-authored-by: Aarni Koskela * fix tests * nit * fix test * format --------- Co-authored-by: Aarni Koskela --- CHANGELOG.md | 2 +- gradio/blocks.py | 18 ++++++++++++++---- gradio/routes.py | 16 +++++++++++++--- test/test_blocks.py | 8 +++++++- test/test_routes.py | 29 +++++++++++++++++++++++++++-- 5 files changed, 62 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b2c9daf61c2f..23c543900d146 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ No changes to highlight. ## Bug Fixes: -No changes to highlight. +- Restricts the domains that can be proxied via `/proxy` route by [@abidlabs](https://github.com/abidlabs) in [PR 4406](https://github.com/gradio-app/gradio/pull/4406). ## Other Changes: diff --git a/gradio/blocks.py b/gradio/blocks.py index e0fb45e739ef6..9cdb1a46bc5dc 100644 --- a/gradio/blocks.py +++ b/gradio/blocks.py @@ -738,6 +738,7 @@ def __init__( self.allowed_paths = [] self.blocked_paths = [] self.root_path = "" + self.root_urls = set() if self.analytics_enabled: is_custom_theme = not any( @@ -758,20 +759,22 @@ def from_config( cls, config: dict, fns: list[Callable], - root_url: str | None = None, + root_url: str, ) -> Blocks: """ - Factory method that creates a Blocks from a config and list of functions. + Factory method that creates a Blocks from a config and list of functions. Used + internally by the gradio.external.load() method. Parameters: config: a dictionary containing the configuration of the Blocks. fns: a list of functions that are used in the Blocks. Must be in the same order as the dependencies in the config. - root_url: an optional root url to use for the components in the Blocks. Allows serving files from an external URL. + root_url: an external url to use as a root URL when serving files for components in the Blocks. """ config = copy.deepcopy(config) components_config = config["components"] theme = config.get("theme", "default") original_mapping: dict[int, Block] = {} + root_urls = {root_url} def get_block_instance(id: int) -> Block: for block_config in components_config: @@ -783,8 +786,13 @@ def get_block_instance(id: int) -> Block: block_config["props"].pop("type", None) block_config["props"].pop("name", None) style = block_config["props"].pop("style", None) - if block_config["props"].get("root_url") is None and root_url: + # If a Gradio app B is loaded into a Gradio app A, and B itself loads a + # Gradio app C, then the root_urls of the components in A need to be the + # URL of C, not B. The else clause below handles this case. + if block_config["props"].get("root_url") is None: block_config["props"]["root_url"] = f"{root_url}/" + else: + root_urls.add(block_config["props"]["root_url"]) # Any component has already processed its initial value, so we skip that step here block = cls(**block_config["props"], _skip_init_processing=True) if style and isinstance(block, components.IOComponent): @@ -860,6 +868,7 @@ def iterate_over_children(children_list): blocks.__name__ = "Interface" blocks.api_mode = True + blocks.root_urls = root_urls return blocks def __str__(self): @@ -935,6 +944,7 @@ def render(self): Context.root_block.fns[dependency_offset + i] = new_fn Context.root_block.dependencies.append(dependency) Context.root_block.temp_file_sets.extend(self.temp_file_sets) + Context.root_block.root_urls.update(self.root_urls) if Context.block is not None: Context.block.children.extend(self.children) diff --git a/gradio/routes.py b/gradio/routes.py index 03c0d878cec47..00df98d7d69e5 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -145,9 +145,16 @@ def get_blocks(self) -> gradio.Blocks: raise ValueError("No Blocks has been configured for this app.") return self.blocks - @staticmethod - def build_proxy_request(url_path): + def build_proxy_request(self, url_path): url = httpx.URL(url_path) + assert self.blocks + # Don't proxy a URL unless it's a URL specifically loaded by the user using + # gr.load() to prevent SSRF or harvesting of HF tokens by malicious Spaces. + is_safe_url = any( + url.host == httpx.URL(root).host for root in self.blocks.root_urls + ) + if not is_safe_url: + raise PermissionError("This URL cannot be proxied.") is_hf_url = url.host.endswith(".hf.space") headers = {} if Context.hf_token is not None and is_hf_url: @@ -311,7 +318,10 @@ async def favicon(): @app.get("/proxy={url_path:path}", dependencies=[Depends(login_check)]) async def reverse_proxy(url_path: str): # Adapted from: https://github.com/tiangolo/fastapi/issues/1788 - rp_req = app.build_proxy_request(url_path) + try: + rp_req = app.build_proxy_request(url_path) + except PermissionError as err: + raise HTTPException(status_code=400, detail=str(err)) from err rp_resp = await client.send(rp_req, stream=True) return StreamingResponse( rp_resp.aiter_raw(), diff --git a/test/test_blocks.py b/test/test_blocks.py index 5cae83a4c2016..09761dc05adfb 100644 --- a/test/test_blocks.py +++ b/test/test_blocks.py @@ -137,6 +137,8 @@ def ct_model(diseases, img): assert demo.config["show_api"] is False def test_load_from_config(self): + fake_url = "https://fake.hf.space" + def update(name): return f"Welcome to Gradio, {name}!" @@ -149,8 +151,12 @@ def update(name): gr.Image().style(height=54, width=240) config1 = demo1.get_config_file() - demo2 = gr.Blocks.from_config(config1, [update]) + demo2 = gr.Blocks.from_config(config1, [update], "https://fake.hf.space") + + for component in config1["components"]: + component["props"]["root_url"] = f"{fake_url}/" config2 = demo2.get_config_file() + assert assert_configs_are_equivalent_besides_ids(config1, config2) def test_partial_fn_in_config(self): diff --git a/test/test_routes.py b/test/test_routes.py index 8d9e42c474303..4e2207888cd89 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -398,13 +398,38 @@ def test_mount_gradio_app_raises_error_if_event_queued_but_queue_disabled(self): demo.close() + def test_proxy_route_is_restricted_to_load_urls(self): + gr.context.Context.hf_token = "abcdef" + app = routes.App() + interface = gr.Interface(lambda x: x, "text", "text") + app.configure_app(interface) + with pytest.raises(PermissionError): + app.build_proxy_request( + "https://gradio-tests-test-loading-examples-private.hf.space/file=Bunny.obj" + ) + with pytest.raises(PermissionError): + app.build_proxy_request("https://google.com") + interface.root_urls = { + "https://gradio-tests-test-loading-examples-private.hf.space" + } + app.build_proxy_request( + "https://gradio-tests-test-loading-examples-private.hf.space/file=Bunny.obj" + ) + def test_proxy_does_not_leak_hf_token_externally(self): gr.context.Context.hf_token = "abcdef" - r = routes.App.build_proxy_request( + app = routes.App() + interface = gr.Interface(lambda x: x, "text", "text") + interface.root_urls = { + "https://gradio-tests-test-loading-examples-private.hf.space", + "https://google.com", + } + app.configure_app(interface) + r = app.build_proxy_request( "https://gradio-tests-test-loading-examples-private.hf.space/file=Bunny.obj" ) assert "authorization" in dict(r.headers) - r = routes.App.build_proxy_request("https://google.com") + r = app.build_proxy_request("https://google.com") assert "authorization" not in dict(r.headers)