Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix media repository failing when media store path contains symlinks #11446

Merged
merged 6 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/11445.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug introduced in 1.47.1 where the media repository would fail to work if the media store path contained any symbolic links.
118 changes: 74 additions & 44 deletions synapse/rest/media/v1/filepath.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,47 +43,79 @@ def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str:
)


def _wrap_with_jail_check(func: GetPathMethod) -> GetPathMethod:
def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]:
"""Wraps a path-returning method to check that the returned path(s) do not escape
the media store directory.

The path-returning method may return either a single path, or a list of paths.

The check is not expected to ever fail, unless `func` is missing a call to
`_validate_path_component`, or `_validate_path_component` is buggy.

Args:
func: The `MediaFilePaths` method to wrap. The method may return either a single
path, or a list of paths. Returned paths may be either absolute or relative.
relative: A boolean indicating whether the wrapped method returns paths relative
to the media store directory.

Returns:
The method, wrapped with a check to ensure that the returned path(s) lie within
the media store directory. Raises a `ValueError` if the check fails.
A method which will wrap a path-returning method, adding a check to ensure that
the returned path(s) lie within the media store directory. The check will raise
a `ValueError` if it fails.
"""

@functools.wraps(func)
def _wrapped(
self: "MediaFilePaths", *args: Any, **kwargs: Any
) -> Union[str, List[str]]:
path_or_paths = func(self, *args, **kwargs)

if isinstance(path_or_paths, list):
paths_to_check = path_or_paths
else:
paths_to_check = [path_or_paths]

for path in paths_to_check:
# path may be an absolute or relative path, depending on the method being
# wrapped. When "appending" an absolute path, `os.path.join` discards the
# previous path, which is desired here.
normalized_path = os.path.normpath(os.path.join(self.real_base_path, path))
if (
os.path.commonpath([normalized_path, self.real_base_path])
!= self.real_base_path
):
Comment on lines -77 to -81
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bug was that path was an absolute, un-canonicalized path, so normalized_path was un-canonicalized while self.real_base_path was.

raise ValueError(f"Invalid media store path: {path!r}")

return path_or_paths

return cast(GetPathMethod, _wrapped)
def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod:
@functools.wraps(func)
def _wrapped(
self: "MediaFilePaths", *args: Any, **kwargs: Any
) -> Union[str, List[str]]:
path_or_paths = func(self, *args, **kwargs)

if isinstance(path_or_paths, list):
paths_to_check = path_or_paths
else:
paths_to_check = [path_or_paths]

for path in paths_to_check:
# Construct the path that will ultimately be used.
# We cannot guess whether `path` is relative to the media store
# directory, since the media store directory may itself be a relative
# path.
if relative:
normalized_path = os.path.normpath(
os.path.join(self.base_path, path)
)
else:
normalized_path = os.path.normpath(path)
squahtx marked this conversation as resolved.
Show resolved Hide resolved

normalized_base_path = os.path.normpath(self.base_path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a big deal, but should this be done outside this function, once? (Or maybe that precludes someone from modifying a symlink while synapse is running, but that sounds like a bad idea anyway.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's possible to normalize only once. normpath is completely independent from the filesystem as far as I know. It's only realpath that resolves symlinks.


# Check that the path lies within the media store directory.
# `os.path.commonpath` does not take `../`s into account and
# considers `a/b/c` and `a/b/c/../d` to have a common path of
# `a/b/c`, so we have to normalize the paths first.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment makes it sound like we an't use commonpath, but then we do use it below... do we need to do something about these issues?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sounds like the comment could do with rewording. It was intended to explain why we use normpath before commonpath.

As for the normpath issue below, I'm really struggling to find a way to resolve it.

As an example, if you have a media store at /var/lib/synapse, symlinked to /mnt/media, then /var/lib/synapse/../synapse/some_file refers to /mnt/synapse/some_file and not /mnt/media/some_file.
normpath will elide the .. and make the path look like it's legitimate.

realpath would resolve the path correctly, but then server admins who have symlinks within the media store directory would have problems, eg. if remote_content was symlinked elsewhere.

Since this check is a last resort and not the check for path traversal, I opted to be permissive and let the issue slide.

(todo tomorrow: revisit comments)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it is reasonable to not allow relative paths or not allow symlinks somewhere in here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, it'd be nice to not allow symlinks within the media store. On the other hand, I can definitely imagine a server admin wanting to offload one of the big media subdirectories elsewhere and expecting a symlink to work.

As for relative paths (paths containing .. or .), in theory we shouldn't encounter any if the main checks work. Then again this decorator is wholly unnecessary if the main checks work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tried to reword the comments. Hopefully they are less confusing

#
# The normalization process has two issues:
# * `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a
# different path if `a/b/c` is a symlink. `abspath` has the same
# issue.
# * A `base_path` of `.` will fail the check below.
# This configuration is exceedingly unlikely.
#
# As an alternative, `os.path.realpath` may be used. However it proves
# problematic if there are symlinks inside the media store.
# eg. if `url_store/` is symlinked to elsewhere, its canonical path
# won't match that of the main media store directory.
if (
os.path.commonpath([normalized_path, normalized_base_path])
!= normalized_base_path
):
# The path resolves to outside the media store directory.
raise ValueError(f"Invalid media store path: {path!r}")

return path_or_paths

return cast(GetPathMethod, _wrapped)

return _wrap_with_jail_check_inner


ALLOWED_CHARACTERS = set(
Expand Down Expand Up @@ -128,9 +160,6 @@ class MediaFilePaths:
def __init__(self, primary_base_path: str):
self.base_path = primary_base_path

# The media store directory, with all symlinks resolved.
self.real_base_path = os.path.realpath(primary_base_path)

# Refuse to initialize if paths cannot be validated correctly for the current
# platform.
assert os.path.sep not in ALLOWED_CHARACTERS
Expand All @@ -140,7 +169,7 @@ def __init__(self, primary_base_path: str):
# for certain homeservers there, since ":"s aren't allowed in paths.
assert os.name == "posix"

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def local_media_filepath_rel(self, media_id: str) -> str:
return os.path.join(
"local_content",
Expand All @@ -151,7 +180,7 @@ def local_media_filepath_rel(self, media_id: str) -> str:

local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def local_media_thumbnail_rel(
self, media_id: str, width: int, height: int, content_type: str, method: str
) -> str:
Expand All @@ -167,7 +196,7 @@ def local_media_thumbnail_rel(

local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=False)
def local_media_thumbnail_dir(self, media_id: str) -> str:
"""
Retrieve the local store path of thumbnails of a given media_id
Expand All @@ -185,7 +214,7 @@ def local_media_thumbnail_dir(self, media_id: str) -> str:
_validate_path_component(media_id[4:]),
)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str:
return os.path.join(
"remote_content",
Expand All @@ -197,7 +226,7 @@ def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str:

remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def remote_media_thumbnail_rel(
self,
server_name: str,
Expand All @@ -223,7 +252,7 @@ def remote_media_thumbnail_rel(
# Legacy path that was used to store thumbnails previously.
# Should be removed after some time, when most of the thumbnails are stored
# using the new path.
@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def remote_media_thumbnail_rel_legacy(
self, server_name: str, file_id: str, width: int, height: int, content_type: str
) -> str:
Expand All @@ -238,6 +267,7 @@ def remote_media_thumbnail_rel_legacy(
_validate_path_component(file_name),
)

@_wrap_with_jail_check(relative=False)
def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str:
return os.path.join(
self.base_path,
Expand All @@ -248,7 +278,7 @@ def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str:
_validate_path_component(file_id[4:]),
)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def url_cache_filepath_rel(self, media_id: str) -> str:
if NEW_FORMAT_ID_RE.match(media_id):
# Media id is of the form <DATE><RANDOM_STRING>
Expand All @@ -268,7 +298,7 @@ def url_cache_filepath_rel(self, media_id: str) -> str:

url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=False)
def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]:
"The dirs to try and remove if we delete the media_id file"
if NEW_FORMAT_ID_RE.match(media_id):
Expand All @@ -290,7 +320,7 @@ def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]:
),
]

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def url_cache_thumbnail_rel(
self, media_id: str, width: int, height: int, content_type: str, method: str
) -> str:
Expand Down Expand Up @@ -318,7 +348,7 @@ def url_cache_thumbnail_rel(

url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=True)
def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
# Media id is of the form <DATE><RANDOM_STRING>
# E.g.: 2017-09-28-fsdRDt24DS234dsf
Expand All @@ -341,7 +371,7 @@ def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
url_cache_thumbnail_directory_rel
)

@_wrap_with_jail_check
@_wrap_with_jail_check(relative=False)
def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
"The dirs to try and remove if we delete the media_id thumbnails"
# Media id is of the form <DATE><RANDOM_STRING>
Expand Down
39 changes: 39 additions & 0 deletions tests/rest/media/v1/test_filepath.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import os
from typing import Iterable

from synapse.rest.media.v1.filepath import MediaFilePaths
Expand Down Expand Up @@ -486,3 +487,41 @@ def _test_path_validation(
f"{value!r} unexpectedly passed validation: "
f"{method} returned {path_or_list!r}"
)

def test_symlink(self):
clokep marked this conversation as resolved.
Show resolved Hide resolved
"""Test that a symlink does not cause the jail check to fail."""
media_store_path = self.mktemp()

# symlink the media store directory
os.symlink("/mnt/synapse/media_store", media_store_path)

# Test that relative and absolute paths don't trip the check
# NB: `media_store_path` is a relative path
filepaths = MediaFilePaths(media_store_path)
filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar")
filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar")

filepaths = MediaFilePaths(os.path.abspath(media_store_path))
filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar")
filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar")

def test_symlink_subdirectory(self):
"""Test that a symlinked subdirectory does not cause the jail check to fail."""
media_store_path = self.mktemp()
os.mkdir(media_store_path)

# symlink `url_cache/`
os.symlink(
"/mnt/synapse/media_store_url_cache",
os.path.join(media_store_path, "url_cache"),
)

# Test that relative and absolute paths don't trip the check
# NB: `media_store_path` is a relative path
filepaths = MediaFilePaths(media_store_path)
filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar")
filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar")

filepaths = MediaFilePaths(os.path.abspath(media_store_path))
filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar")
filepaths.url_cache_filepath_dirs_to_delete("2020-01-02_GerZNDnDZVjsOtar")