From 79e11d04d64cd283547dc43bfd5a5022eca51603 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 29 Nov 2021 13:00:44 +0000 Subject: [PATCH 1/6] Fix media repository failing when media store path contains symlinks --- changelog.d/11445.bugfix | 1 + synapse/rest/media/v1/filepath.py | 118 +++++++++++++++++---------- tests/rest/media/v1/test_filepath.py | 39 +++++++++ 3 files changed, 114 insertions(+), 44 deletions(-) create mode 100644 changelog.d/11445.bugfix diff --git a/changelog.d/11445.bugfix b/changelog.d/11445.bugfix new file mode 100644 index 000000000000..fa5e055d502e --- /dev/null +++ b/changelog.d/11445.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.47.1 where the media repository would fail to work if the media store path contained any symbolic links. diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index c0e15c65139d..ec7a258eeaef 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -43,47 +43,79 @@ def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str: ) -def _wrap_with_jail_check(func: GetPathMethod) -> GetPathMethod: +def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]: """Wraps a path-returning method to check that the returned path(s) do not escape the media store directory. + The path-returning method may return either a single path, or a list of paths. + The check is not expected to ever fail, unless `func` is missing a call to `_validate_path_component`, or `_validate_path_component` is buggy. Args: - func: The `MediaFilePaths` method to wrap. The method may return either a single - path, or a list of paths. Returned paths may be either absolute or relative. + relative: A boolean indicating whether the wrapped method returns paths relative + to the media store directory. Returns: - The method, wrapped with a check to ensure that the returned path(s) lie within - the media store directory. Raises a `ValueError` if the check fails. + A method which will wrap a path-returning method, adding a check to ensure that + the returned path(s) lie within the media store directory. The check will raise + a `ValueError` if it fails. """ - @functools.wraps(func) - def _wrapped( - self: "MediaFilePaths", *args: Any, **kwargs: Any - ) -> Union[str, List[str]]: - path_or_paths = func(self, *args, **kwargs) - - if isinstance(path_or_paths, list): - paths_to_check = path_or_paths - else: - paths_to_check = [path_or_paths] - - for path in paths_to_check: - # path may be an absolute or relative path, depending on the method being - # wrapped. When "appending" an absolute path, `os.path.join` discards the - # previous path, which is desired here. - normalized_path = os.path.normpath(os.path.join(self.real_base_path, path)) - if ( - os.path.commonpath([normalized_path, self.real_base_path]) - != self.real_base_path - ): - raise ValueError(f"Invalid media store path: {path!r}") - - return path_or_paths - - return cast(GetPathMethod, _wrapped) + def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod: + @functools.wraps(func) + def _wrapped( + self: "MediaFilePaths", *args: Any, **kwargs: Any + ) -> Union[str, List[str]]: + path_or_paths = func(self, *args, **kwargs) + + if isinstance(path_or_paths, list): + paths_to_check = path_or_paths + else: + paths_to_check = [path_or_paths] + + for path in paths_to_check: + # Construct the path that will ultimately be used. + # We cannot guess whether `path` is relative to the media store + # directory, since the media store directory may itself be a relative + # path. + if relative: + normalized_path = os.path.normpath( + os.path.join(self.base_path, path) + ) + else: + normalized_path = os.path.normpath(path) + + normalized_base_path = os.path.normpath(self.base_path) + + # Check that the path lies within the media store directory. + # `os.path.commonpath` does not take `../`s into account and + # considers `a/b/c` and `a/b/c/../d` to have a common path of + # `a/b/c`, so we have to normalize the paths first. + # + # The normalization process has two issues: + # * `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a + # different path if `a/b/c` is a symlink. `abspath` has the same + # issue. + # * A `base_path` of `.` will fail the check below. + # This configuration is exceedingly unlikely. + # + # As an alternative, `os.path.realpath` may be used. However it proves + # problematic if there are symlinks inside the media store. + # eg. if `url_store/` is symlinked to elsewhere, its canonical path + # won't match that of the main media store directory. + if ( + os.path.commonpath([normalized_path, normalized_base_path]) + != normalized_base_path + ): + # The path resolves to outside the media store directory. + raise ValueError(f"Invalid media store path: {path!r}") + + return path_or_paths + + return cast(GetPathMethod, _wrapped) + + return _wrap_with_jail_check_inner ALLOWED_CHARACTERS = set( @@ -128,9 +160,6 @@ class MediaFilePaths: def __init__(self, primary_base_path: str): self.base_path = primary_base_path - # The media store directory, with all symlinks resolved. - self.real_base_path = os.path.realpath(primary_base_path) - # Refuse to initialize if paths cannot be validated correctly for the current # platform. assert os.path.sep not in ALLOWED_CHARACTERS @@ -140,7 +169,7 @@ def __init__(self, primary_base_path: str): # for certain homeservers there, since ":"s aren't allowed in paths. assert os.name == "posix" - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def local_media_filepath_rel(self, media_id: str) -> str: return os.path.join( "local_content", @@ -151,7 +180,7 @@ def local_media_filepath_rel(self, media_id: str) -> str: local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def local_media_thumbnail_rel( self, media_id: str, width: int, height: int, content_type: str, method: str ) -> str: @@ -167,7 +196,7 @@ def local_media_thumbnail_rel( local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=False) def local_media_thumbnail_dir(self, media_id: str) -> str: """ Retrieve the local store path of thumbnails of a given media_id @@ -185,7 +214,7 @@ def local_media_thumbnail_dir(self, media_id: str) -> str: _validate_path_component(media_id[4:]), ) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: return os.path.join( "remote_content", @@ -197,7 +226,7 @@ def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def remote_media_thumbnail_rel( self, server_name: str, @@ -223,7 +252,7 @@ def remote_media_thumbnail_rel( # Legacy path that was used to store thumbnails previously. # Should be removed after some time, when most of the thumbnails are stored # using the new path. - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def remote_media_thumbnail_rel_legacy( self, server_name: str, file_id: str, width: int, height: int, content_type: str ) -> str: @@ -238,6 +267,7 @@ def remote_media_thumbnail_rel_legacy( _validate_path_component(file_name), ) + @_wrap_with_jail_check(relative=False) def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: return os.path.join( self.base_path, @@ -248,7 +278,7 @@ def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: _validate_path_component(file_id[4:]), ) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def url_cache_filepath_rel(self, media_id: str) -> str: if NEW_FORMAT_ID_RE.match(media_id): # Media id is of the form @@ -268,7 +298,7 @@ def url_cache_filepath_rel(self, media_id: str) -> str: url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=False) def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id file" if NEW_FORMAT_ID_RE.match(media_id): @@ -290,7 +320,7 @@ def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: ), ] - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def url_cache_thumbnail_rel( self, media_id: str, width: int, height: int, content_type: str, method: str ) -> str: @@ -318,7 +348,7 @@ def url_cache_thumbnail_rel( url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=True) def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf @@ -341,7 +371,7 @@ def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: url_cache_thumbnail_directory_rel ) - @_wrap_with_jail_check + @_wrap_with_jail_check(relative=False) def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form diff --git a/tests/rest/media/v1/test_filepath.py b/tests/rest/media/v1/test_filepath.py index 8fe94f7d853c..cd3bf44f6dcc 100644 --- a/tests/rest/media/v1/test_filepath.py +++ b/tests/rest/media/v1/test_filepath.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import inspect +import os from typing import Iterable from synapse.rest.media.v1.filepath import MediaFilePaths @@ -486,3 +487,41 @@ def _test_path_validation( f"{value!r} unexpectedly passed validation: " f"{method} returned {path_or_list!r}" ) + + def test_symlink(self): + """Test that a symlink does not cause the jail check to fail.""" + media_store_path = self.mktemp() + + # symlink the media store directory + os.symlink("/mnt/synapse/media_store", media_store_path) + + # Test that relative and absolute paths don't trip the check + # NB: `media_store_path` is a relative path + filepaths = MediaFilePaths(media_store_path) + filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") + filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + + filepaths = MediaFilePaths(os.path.abspath(media_store_path)) + filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") + filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + + def test_symlink_subdirectory(self): + """Test that a symlinked subdirectory does not cause the jail check to fail.""" + media_store_path = self.mktemp() + os.mkdir(media_store_path) + + # symlink `url_cache/` + os.symlink( + "/mnt/synapse/media_store_url_cache", + os.path.join(media_store_path, "url_cache"), + ) + + # Test that relative and absolute paths don't trip the check + # NB: `media_store_path` is a relative path + filepaths = MediaFilePaths(media_store_path) + filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") + filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + + filepaths = MediaFilePaths(os.path.abspath(media_store_path)) + filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") + filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") From 519821b60c15e39d4c50ba156005981050a32eba Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 29 Nov 2021 13:54:51 +0000 Subject: [PATCH 2/6] Fix newsfile --- changelog.d/{11445.bugfix => 11446.bugfix} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog.d/{11445.bugfix => 11446.bugfix} (100%) diff --git a/changelog.d/11445.bugfix b/changelog.d/11446.bugfix similarity index 100% rename from changelog.d/11445.bugfix rename to changelog.d/11446.bugfix From 9d166229ca299192123dd282669fd6ce86d4a0d7 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Tue, 30 Nov 2021 18:30:51 +0000 Subject: [PATCH 3/6] Normalize base_path once --- synapse/rest/media/v1/filepath.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index ec7a258eeaef..0fca14bb761c 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -86,8 +86,6 @@ def _wrapped( else: normalized_path = os.path.normpath(path) - normalized_base_path = os.path.normpath(self.base_path) - # Check that the path lies within the media store directory. # `os.path.commonpath` does not take `../`s into account and # considers `a/b/c` and `a/b/c/../d` to have a common path of @@ -105,8 +103,8 @@ def _wrapped( # eg. if `url_store/` is symlinked to elsewhere, its canonical path # won't match that of the main media store directory. if ( - os.path.commonpath([normalized_path, normalized_base_path]) - != normalized_base_path + os.path.commonpath([normalized_path, self.normalized_base_path]) + != self.normalized_base_path ): # The path resolves to outside the media store directory. raise ValueError(f"Invalid media store path: {path!r}") @@ -159,6 +157,7 @@ class MediaFilePaths: def __init__(self, primary_base_path: str): self.base_path = primary_base_path + self.normalized_base_path = os.path.normpath(self.base_path) # Refuse to initialize if paths cannot be validated correctly for the current # platform. From 89337741b81f70a062efc8b5d1b434f90efd07e8 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 30 Nov 2021 18:35:11 +0000 Subject: [PATCH 4/6] Update synapse/rest/media/v1/filepath.py Co-authored-by: Patrick Cloke --- synapse/rest/media/v1/filepath.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 0fca14bb761c..f9b20c7f49c2 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -80,11 +80,8 @@ def _wrapped( # directory, since the media store directory may itself be a relative # path. if relative: - normalized_path = os.path.normpath( - os.path.join(self.base_path, path) - ) - else: - normalized_path = os.path.normpath(path) + path = os.path.join(self.base_path, path) + normalized_path = os.path.normpath(path) # Check that the path lies within the media store directory. # `os.path.commonpath` does not take `../`s into account and From eaf8069eaa9440c89a25114166cc3a17da9768df Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Wed, 1 Dec 2021 19:42:58 +0000 Subject: [PATCH 5/6] Add more comprehensive tests for the jail decorator --- tests/rest/media/v1/test_filepath.py | 90 ++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 11 deletions(-) diff --git a/tests/rest/media/v1/test_filepath.py b/tests/rest/media/v1/test_filepath.py index cd3bf44f6dcc..913bc530aac1 100644 --- a/tests/rest/media/v1/test_filepath.py +++ b/tests/rest/media/v1/test_filepath.py @@ -15,7 +15,7 @@ import os from typing import Iterable -from synapse.rest.media.v1.filepath import MediaFilePaths +from synapse.rest.media.v1.filepath import MediaFilePaths, _wrap_with_jail_check from tests import unittest @@ -488,7 +488,75 @@ def _test_path_validation( f"{method} returned {path_or_list!r}" ) - def test_symlink(self): + +class MediaFilePathsJailTestCase(unittest.TestCase): + def _check_relative_path(self, filepaths: MediaFilePaths, path: str) -> None: + """Passes a relative path through the jail check. + + Args: + filepaths: The `MediaFilePaths` instance. + path: A path relative to the media store directory. + + Raises: + ValueError: If the jail check fails. + """ + + @_wrap_with_jail_check(relative=True) + def _make_relative_path(self: MediaFilePaths, path: str) -> str: + return path + + _make_relative_path(filepaths, path) + + def _check_absolute_path(self, filepaths: MediaFilePaths, path: str) -> None: + """Passes an absolute path through the jail check. + + Args: + filepaths: The `MediaFilePaths` instance. + path: A path relative to the media store directory. + + Raises: + ValueError: If the jail check fails. + """ + + @_wrap_with_jail_check(relative=False) + def _make_absolute_path(self: MediaFilePaths, path: str) -> str: + return os.path.join(self.base_path, path) + + _make_absolute_path(filepaths, path) + + def test_traversal_inside(self) -> None: + """Test the jail check for paths that stay within the media directory.""" + # Despite the `../`s, these paths still lie within the media directory and it's + # expected for the jail check to allow them through. + # These paths ought to trip the other checks in place and should never be + # returned. + filepaths = MediaFilePaths("/media_store") + path = "url_cache/2020-01-02/../../GerZNDnDZVjsOtar" + self._check_relative_path(filepaths, path) + self._check_absolute_path(filepaths, path) + + def test_traversal_outside(self) -> None: + """Test that the jail check fails for paths that escape the media directory.""" + filepaths = MediaFilePaths("/media_store") + path = "url_cache/2020-01-02/../../../GerZNDnDZVjsOtar" + with self.assertRaises(ValueError): + self._check_relative_path(filepaths, path) + with self.assertRaises(ValueError): + self._check_absolute_path(filepaths, path) + + def test_traversal_reentry(self) -> None: + """Test the jail check for paths that exit and re-enter the media directory.""" + # These paths lie outside the media directory if it is a symlink, and inside + # otherwise. Ideally the check should fail, but this proves difficult. + # This test documents the behaviour for this edge case. + # These paths ought to trip the other checks in place and should never be + # returned. + filepaths = MediaFilePaths("/media_store") + path = "url_cache/2020-01-02/../../../media_store/GerZNDnDZVjsOtar" + self._check_relative_path(filepaths, path) + self._check_absolute_path(filepaths, path) + + def test_symlink(self) -> None: """Test that a symlink does not cause the jail check to fail.""" media_store_path = self.mktemp() @@ -498,14 +566,14 @@ def test_symlink(self): # Test that relative and absolute paths don't trip the check # NB: `media_store_path` is a relative path filepaths = MediaFilePaths(media_store_path) - filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") - filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") + self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") filepaths = MediaFilePaths(os.path.abspath(media_store_path)) - filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") - filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") + self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") - def test_symlink_subdirectory(self): + def test_symlink_subdirectory(self) -> None: """Test that a symlinked subdirectory does not cause the jail check to fail.""" media_store_path = self.mktemp() os.mkdir(media_store_path) @@ -519,9 +587,9 @@ def test_symlink_subdirectory(self): # Test that relative and absolute paths don't trip the check # NB: `media_store_path` is a relative path filepaths = MediaFilePaths(media_store_path) - filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") - filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") + self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") filepaths = MediaFilePaths(os.path.abspath(media_store_path)) - filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar") - filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar") + self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") + self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar") From 00ea5abe54bb59678465730d5ad37ba9709e65ff Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Wed, 1 Dec 2021 21:31:58 +0000 Subject: [PATCH 6/6] Reword comments --- synapse/rest/media/v1/filepath.py | 35 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index f9b20c7f49c2..1f6441c412d0 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -83,29 +83,30 @@ def _wrapped( path = os.path.join(self.base_path, path) normalized_path = os.path.normpath(path) - # Check that the path lies within the media store directory. - # `os.path.commonpath` does not take `../`s into account and - # considers `a/b/c` and `a/b/c/../d` to have a common path of - # `a/b/c`, so we have to normalize the paths first. - # - # The normalization process has two issues: - # * `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a - # different path if `a/b/c` is a symlink. `abspath` has the same - # issue. - # * A `base_path` of `.` will fail the check below. - # This configuration is exceedingly unlikely. - # - # As an alternative, `os.path.realpath` may be used. However it proves - # problematic if there are symlinks inside the media store. - # eg. if `url_store/` is symlinked to elsewhere, its canonical path - # won't match that of the main media store directory. + # Now that `normpath` has eliminated `../`s and `./`s from the path, + # `os.path.commonpath` can be used to check whether it lies within the + # media store directory. if ( os.path.commonpath([normalized_path, self.normalized_base_path]) != self.normalized_base_path ): - # The path resolves to outside the media store directory. + # The path resolves to outside the media store directory, + # or `self.base_path` is `.`, which is an unlikely configuration. raise ValueError(f"Invalid media store path: {path!r}") + # Note that `os.path.normpath`/`abspath` has a subtle caveat: + # `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a + # different path if `a/b/c` is a symlink. That is, the check above is + # not perfect and may allow a certain restricted subset of untrustworthy + # paths through. Since the check above is secondary to the main + # `_validate_path_component` checks, it's less important for it to be + # perfect. + # + # As an alternative, `os.path.realpath` will resolve symlinks, but + # proves problematic if there are symlinks inside the media store. + # eg. if `url_store/` is symlinked to elsewhere, its canonical path + # won't match that of the main media store directory. + return path_or_paths return cast(GetPathMethod, _wrapped)