-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix media repository failing when media store path contains symlinks #11446
Changes from 1 commit
79e11d0
519821b
9d16622
8933774
eaf8069
00ea5ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Fix a bug introduced in 1.47.1 where the media repository would fail to work if the media store path contained any symbolic links. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,47 +43,79 @@ def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str: | |
) | ||
|
||
|
||
def _wrap_with_jail_check(func: GetPathMethod) -> GetPathMethod: | ||
def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]: | ||
"""Wraps a path-returning method to check that the returned path(s) do not escape | ||
the media store directory. | ||
|
||
The path-returning method may return either a single path, or a list of paths. | ||
|
||
The check is not expected to ever fail, unless `func` is missing a call to | ||
`_validate_path_component`, or `_validate_path_component` is buggy. | ||
|
||
Args: | ||
func: The `MediaFilePaths` method to wrap. The method may return either a single | ||
path, or a list of paths. Returned paths may be either absolute or relative. | ||
relative: A boolean indicating whether the wrapped method returns paths relative | ||
to the media store directory. | ||
|
||
Returns: | ||
The method, wrapped with a check to ensure that the returned path(s) lie within | ||
the media store directory. Raises a `ValueError` if the check fails. | ||
A method which will wrap a path-returning method, adding a check to ensure that | ||
the returned path(s) lie within the media store directory. The check will raise | ||
a `ValueError` if it fails. | ||
""" | ||
|
||
@functools.wraps(func) | ||
def _wrapped( | ||
self: "MediaFilePaths", *args: Any, **kwargs: Any | ||
) -> Union[str, List[str]]: | ||
path_or_paths = func(self, *args, **kwargs) | ||
|
||
if isinstance(path_or_paths, list): | ||
paths_to_check = path_or_paths | ||
else: | ||
paths_to_check = [path_or_paths] | ||
|
||
for path in paths_to_check: | ||
# path may be an absolute or relative path, depending on the method being | ||
# wrapped. When "appending" an absolute path, `os.path.join` discards the | ||
# previous path, which is desired here. | ||
normalized_path = os.path.normpath(os.path.join(self.real_base_path, path)) | ||
if ( | ||
os.path.commonpath([normalized_path, self.real_base_path]) | ||
!= self.real_base_path | ||
): | ||
raise ValueError(f"Invalid media store path: {path!r}") | ||
|
||
return path_or_paths | ||
|
||
return cast(GetPathMethod, _wrapped) | ||
def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod: | ||
@functools.wraps(func) | ||
def _wrapped( | ||
self: "MediaFilePaths", *args: Any, **kwargs: Any | ||
) -> Union[str, List[str]]: | ||
path_or_paths = func(self, *args, **kwargs) | ||
|
||
if isinstance(path_or_paths, list): | ||
paths_to_check = path_or_paths | ||
else: | ||
paths_to_check = [path_or_paths] | ||
|
||
for path in paths_to_check: | ||
# Construct the path that will ultimately be used. | ||
# We cannot guess whether `path` is relative to the media store | ||
# directory, since the media store directory may itself be a relative | ||
# path. | ||
if relative: | ||
normalized_path = os.path.normpath( | ||
os.path.join(self.base_path, path) | ||
) | ||
else: | ||
normalized_path = os.path.normpath(path) | ||
squahtx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
normalized_base_path = os.path.normpath(self.base_path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a big deal, but should this be done outside this function, once? (Or maybe that precludes someone from modifying a symlink while synapse is running, but that sounds like a bad idea anyway.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it's possible to normalize only once. |
||
|
||
# Check that the path lies within the media store directory. | ||
# `os.path.commonpath` does not take `../`s into account and | ||
# considers `a/b/c` and `a/b/c/../d` to have a common path of | ||
# `a/b/c`, so we have to normalize the paths first. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment makes it sound like we an't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It sounds like the comment could do with rewording. It was intended to explain why we use As for the As an example, if you have a media store at
Since this check is a last resort and not the check for path traversal, I opted to be permissive and let the issue slide. (todo tomorrow: revisit comments) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it is reasonable to not allow relative paths or not allow symlinks somewhere in here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, it'd be nice to not allow symlinks within the media store. On the other hand, I can definitely imagine a server admin wanting to offload one of the big media subdirectories elsewhere and expecting a symlink to work. As for relative paths (paths containing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've tried to reword the comments. Hopefully they are less confusing |
||
# | ||
# The normalization process has two issues: | ||
# * `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a | ||
# different path if `a/b/c` is a symlink. `abspath` has the same | ||
# issue. | ||
# * A `base_path` of `.` will fail the check below. | ||
# This configuration is exceedingly unlikely. | ||
# | ||
# As an alternative, `os.path.realpath` may be used. However it proves | ||
# problematic if there are symlinks inside the media store. | ||
# eg. if `url_store/` is symlinked to elsewhere, its canonical path | ||
# won't match that of the main media store directory. | ||
if ( | ||
os.path.commonpath([normalized_path, normalized_base_path]) | ||
!= normalized_base_path | ||
): | ||
# The path resolves to outside the media store directory. | ||
raise ValueError(f"Invalid media store path: {path!r}") | ||
|
||
return path_or_paths | ||
|
||
return cast(GetPathMethod, _wrapped) | ||
|
||
return _wrap_with_jail_check_inner | ||
|
||
|
||
ALLOWED_CHARACTERS = set( | ||
|
@@ -128,9 +160,6 @@ class MediaFilePaths: | |
def __init__(self, primary_base_path: str): | ||
self.base_path = primary_base_path | ||
|
||
# The media store directory, with all symlinks resolved. | ||
self.real_base_path = os.path.realpath(primary_base_path) | ||
|
||
# Refuse to initialize if paths cannot be validated correctly for the current | ||
# platform. | ||
assert os.path.sep not in ALLOWED_CHARACTERS | ||
|
@@ -140,7 +169,7 @@ def __init__(self, primary_base_path: str): | |
# for certain homeservers there, since ":"s aren't allowed in paths. | ||
assert os.name == "posix" | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def local_media_filepath_rel(self, media_id: str) -> str: | ||
return os.path.join( | ||
"local_content", | ||
|
@@ -151,7 +180,7 @@ def local_media_filepath_rel(self, media_id: str) -> str: | |
|
||
local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def local_media_thumbnail_rel( | ||
self, media_id: str, width: int, height: int, content_type: str, method: str | ||
) -> str: | ||
|
@@ -167,7 +196,7 @@ def local_media_thumbnail_rel( | |
|
||
local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=False) | ||
def local_media_thumbnail_dir(self, media_id: str) -> str: | ||
""" | ||
Retrieve the local store path of thumbnails of a given media_id | ||
|
@@ -185,7 +214,7 @@ def local_media_thumbnail_dir(self, media_id: str) -> str: | |
_validate_path_component(media_id[4:]), | ||
) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: | ||
return os.path.join( | ||
"remote_content", | ||
|
@@ -197,7 +226,7 @@ def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str: | |
|
||
remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def remote_media_thumbnail_rel( | ||
self, | ||
server_name: str, | ||
|
@@ -223,7 +252,7 @@ def remote_media_thumbnail_rel( | |
# Legacy path that was used to store thumbnails previously. | ||
# Should be removed after some time, when most of the thumbnails are stored | ||
# using the new path. | ||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def remote_media_thumbnail_rel_legacy( | ||
self, server_name: str, file_id: str, width: int, height: int, content_type: str | ||
) -> str: | ||
|
@@ -238,6 +267,7 @@ def remote_media_thumbnail_rel_legacy( | |
_validate_path_component(file_name), | ||
) | ||
|
||
@_wrap_with_jail_check(relative=False) | ||
def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: | ||
return os.path.join( | ||
self.base_path, | ||
|
@@ -248,7 +278,7 @@ def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str: | |
_validate_path_component(file_id[4:]), | ||
) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def url_cache_filepath_rel(self, media_id: str) -> str: | ||
if NEW_FORMAT_ID_RE.match(media_id): | ||
# Media id is of the form <DATE><RANDOM_STRING> | ||
|
@@ -268,7 +298,7 @@ def url_cache_filepath_rel(self, media_id: str) -> str: | |
|
||
url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=False) | ||
def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: | ||
"The dirs to try and remove if we delete the media_id file" | ||
if NEW_FORMAT_ID_RE.match(media_id): | ||
|
@@ -290,7 +320,7 @@ def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]: | |
), | ||
] | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def url_cache_thumbnail_rel( | ||
self, media_id: str, width: int, height: int, content_type: str, method: str | ||
) -> str: | ||
|
@@ -318,7 +348,7 @@ def url_cache_thumbnail_rel( | |
|
||
url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=True) | ||
def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: | ||
# Media id is of the form <DATE><RANDOM_STRING> | ||
# E.g.: 2017-09-28-fsdRDt24DS234dsf | ||
|
@@ -341,7 +371,7 @@ def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: | |
url_cache_thumbnail_directory_rel | ||
) | ||
|
||
@_wrap_with_jail_check | ||
@_wrap_with_jail_check(relative=False) | ||
def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: | ||
"The dirs to try and remove if we delete the media_id thumbnails" | ||
# Media id is of the form <DATE><RANDOM_STRING> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The bug was that
path
was an absolute, un-canonicalized path, sonormalized_path
was un-canonicalized whileself.real_base_path
was.