Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Allow specifying the value of Accept-Language header for URL previews #7265

Merged
merged 6 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/7265.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a config option for specifying the value of the Accept-Language HTTP header when generating URL previews.
25 changes: 25 additions & 0 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,31 @@ media_store_path: "DATADIR/media_store"
#
#max_spider_size: 10M

# A list of values for the Accept-Language HTTP header used when
# downloading webpages during URL preview generation. This allows
# Synapse to specify the preferred languages that URL previews should
# be in when communicating with remote servers.
#
# Each value is a IETF language tag; a 2-3 letter identifier for a
# language, optionally followed by subtags separated by '-', specifying
# a country or region variant.
#
# Multiple values can be provided, and a weight can be added to each by
# using quality value syntax (;q=). '*' translates to any language.
#
# Defaults to "en".
#
# Example:
#
# url_preview_accept_language:
# - en-UK
# - en-US;q=0.9
# - fr;q=0.8
# - *;q=0.7
#
url_preview_accept_language:
# - en


## Captcha ##
# See docs/CAPTCHA_SETUP for full details of configuring this.
Expand Down
29 changes: 29 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ def read_config(self, config, **kwargs):

self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ())

self.url_preview_accept_language = config.get(
"url_preview_accept_language"
) or ["en"]

def generate_config_section(self, data_dir_path, **kwargs):
media_store = os.path.join(data_dir_path, "media_store")
uploads_path = os.path.join(data_dir_path, "uploads")
Expand Down Expand Up @@ -329,6 +333,31 @@ def generate_config_section(self, data_dir_path, **kwargs):
# The largest allowed URL preview spidering size in bytes
#
#max_spider_size: 10M

# A list of values for the Accept-Language HTTP header used when
# downloading webpages during URL preview generation. This allows
# Synapse to specify the preferred languages that URL previews should
# be in when communicating with remote servers.
#
# Each value is a IETF language tag; a 2-3 letter identifier for a
# language, optionally followed by subtags separated by '-', specifying
# a country or region variant.
#
# Multiple values can be provided, and a weight can be added to each by
# using quality value syntax (;q=). '*' translates to any language.
#
# Defaults to "en".
#
# Example:
#
# url_preview_accept_language:
# - en-UK
# - en-US;q=0.9
# - fr;q=0.8
# - *;q=0.7
#
url_preview_accept_language:
# - en
"""
% locals()
)
8 changes: 6 additions & 2 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(self, hs, media_repo, media_storage):
self.media_storage = media_storage

self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
self.url_preview_accept_language = hs.config.url_preview_accept_language

# memory cache mapping urls to an ObservableDeferred returning
# JSON-encoded OG metadata
Expand Down Expand Up @@ -315,9 +316,12 @@ async def _download_url(self, url, user):

with self.media_storage.store_into_file(file_info) as (f, fname, finish):
try:
logger.debug("Trying to get url '%s'", url)
logger.debug("Trying to get preview for url '%s'", url)
length, headers, uri, code = await self.client.get_file(
url, output_stream=f, max_size=self.max_spider_size
url,
output_stream=f,
max_size=self.max_spider_size,
headers={"Accept-Language": self.url_preview_accept_language},
)
except SynapseError:
# Pass SynapseErrors through directly, so that the servlet
Expand Down
55 changes: 55 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ def make_homeserver(self, reactor, clock):
)
config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
config["url_preview_url_blacklist"] = []
config["url_preview_accept_language"] = [
"en-UK",
"en-US;q=0.9",
"fr;q=0.8",
"*;q=0.7",
]

self.storage_path = self.mktemp()
self.media_store_path = self.mktemp()
Expand Down Expand Up @@ -507,3 +513,52 @@ def test_OPTIONS(self):
self.pump()
self.assertEqual(channel.code, 200)
self.assertEqual(channel.json_body, {})

def test_accept_language_config_option(self):
"""
Accept-Language header is sent to the remote server
"""
self.lookups["example.com"] = [(IPv4Address, "8.8.8.8")]

# Build and make a request to the server
request, channel = self.make_request(
"GET", "url_preview?url=http://example.com", shorthand=False
)
request.render(self.preview_url)
self.pump()

# Extract Synapse's tcp client
client = self.reactor.tcpClients[0][2].buildProtocol(None)

# Build a fake remote server to reply with
server = AccumulatingProtocol()

# Connect the two together
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))

# Tell Synapse that it has received some data from the remote server
client.dataReceived(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
% (len(self.end_content),)
+ self.end_content
)

# Move the reactor along until we get a response on our original channel
self.pump()
self.assertEqual(channel.code, 200)
self.assertEqual(
channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
)

# Check that the server received the Accept-Language header as part
# of the request from Synapse
self.assertIn(
(
b"Accept-Language: en-UK\r\n"
b"Accept-Language: en-US;q=0.9\r\n"
b"Accept-Language: fr;q=0.8\r\n"
b"Accept-Language: *;q=0.7"
),
server.data,
)