From 103aa8636f02c8d557d8eb2ce0cee4cecf92d243 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Sep 2022 12:58:46 -0500 Subject: [PATCH 01/10] Be able to correlate timeouts in reverse-proxy layer in front of Synapse (pull request ID from header) Fix https://github.com/matrix-org/synapse/issues/13685 --- docs/usage/configuration/config_documentation.md | 6 ++++++ synapse/config/server.py | 4 +++- synapse/http/site.py | 11 ++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index cd546041b2d4..08356d79c1cf 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -436,6 +436,12 @@ Sub-options for each listener include: * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is behind a reverse-proxy. +* `request_id_header`: The header used as the basis for the request ID which + is used in logs and tracing to correlate requests. Useful when Synapse is + behind a reverse-proxy. For example, if you use Cloudflare in front of + Synapse, you can set this to `"cf-ray"` to match up requests even when the + Cloudflare layer times out before Synapse is done. + * `resources`: Only valid for an 'http' listener. A list of resources to host on this port. Sub-options for each resource are: diff --git a/synapse/config/server.py b/synapse/config/server.py index c91df636d9a7..813ea91e0ac6 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -206,6 +206,7 @@ class HttpListenerConfig: resources: List[HttpResourceConfig] = attr.Factory(list) additional_resources: Dict[str, dict] = attr.Factory(dict) tag: Optional[str] = None + request_id_header: Optional[str] = None @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -887,7 +888,7 @@ def read_gc_thresholds( ) -def parse_listener_def(num: int, listener: Any) -> ListenerConfig: +def parse_listener_def(num: int, listener: JsonDict) -> ListenerConfig: """parse a listener config from the config file""" listener_type = listener["type"] # Raise a helpful error if direct TCP replication is still configured. @@ -928,6 +929,7 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig: resources=resources, additional_resources=listener.get("additional_resources", {}), tag=listener.get("tag"), + request_id_header=listener.get("request_id_header"), ) return ListenerConfig(port, bind_addresses, listener_type, tls, http_config) diff --git a/synapse/http/site.py b/synapse/http/site.py index 1155f3f61032..6014867685a9 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -72,10 +72,12 @@ def __init__( site: "SynapseSite", *args: Any, max_request_body_size: int = 1024, + request_id_header: Optional[str], **kw: Any, ): super().__init__(channel, *args, **kw) self._max_request_body_size = max_request_body_size + self.request_id_header = request_id_header self.synapse_site = site self.reactor = site.reactor self._channel = channel # this is used by the tests @@ -172,7 +174,13 @@ def set_opentracing_span(self, span: "opentracing.Span") -> None: self._opentracing_span = span def get_request_id(self) -> str: - return "%s-%i" % (self.get_method(), self.request_seq) + if self.request_id_header: + request_id_value = self.getHeader(self.request_id_header) + + if not request_id_value: + request_id_value = self.request_seq + + return "%s-%i" % (self.get_method(), request_id_value) def get_redacted_uri(self) -> str: """Gets the redacted URI associated with the request (or placeholder if the URI @@ -617,6 +625,7 @@ def request_factory(channel: HTTPChannel, queued: bool) -> Request: self, max_request_body_size=max_request_body_size, queued=queued, + request_id_header=config.http_options.request_id_header, ) self.requestFactory = request_factory # type: ignore From 41d5244686af940484d8754c76bc308ff3bd79e3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Sep 2022 13:24:01 -0500 Subject: [PATCH 02/10] Tidy up --- docs/usage/configuration/config_documentation.md | 12 +++++++----- synapse/http/site.py | 10 ++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 08356d79c1cf..f22ec1461fa0 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -436,11 +436,13 @@ Sub-options for each listener include: * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is behind a reverse-proxy. -* `request_id_header`: The header used as the basis for the request ID which - is used in logs and tracing to correlate requests. Useful when Synapse is - behind a reverse-proxy. For example, if you use Cloudflare in front of - Synapse, you can set this to `"cf-ray"` to match up requests even when the - Cloudflare layer times out before Synapse is done. +* `request_id_header`: The header extracted from each incoming request that is + used as the basis for the request ID which is used in logs and tracing to + correlate and match up requests. When unset, Synapse will generate automatic + sequential request IDs. This option is useful when Synapse is behind a + reverse-proxy. For example, if you use Cloudflare in front of Synapse, you + can set this as `"cf-ray"` to match up requests even when the Cloudflare + layer times out before Synapse is done. * `resources`: Only valid for an 'http' listener. A list of resources to host on this port. Sub-options for each resource are: diff --git a/synapse/http/site.py b/synapse/http/site.py index 6014867685a9..ae9ff2559c2e 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -72,7 +72,7 @@ def __init__( site: "SynapseSite", *args: Any, max_request_body_size: int = 1024, - request_id_header: Optional[str], + request_id_header: Optional[str] = None, **kw: Any, ): super().__init__(channel, *args, **kw) @@ -178,9 +178,9 @@ def get_request_id(self) -> str: request_id_value = self.getHeader(self.request_id_header) if not request_id_value: - request_id_value = self.request_seq + request_id_value = str(self.request_seq) - return "%s-%i" % (self.get_method(), request_id_value) + return "%s-%s" % (self.get_method(), request_id_value) def get_redacted_uri(self) -> str: """Gets the redacted URI associated with the request (or placeholder if the URI @@ -619,13 +619,15 @@ def __init__( proxied = config.http_options.x_forwarded request_class = XForwardedForRequest if proxied else SynapseRequest + request_id_header = config.http_options.request_id_header + def request_factory(channel: HTTPChannel, queued: bool) -> Request: return request_class( channel, self, max_request_body_size=max_request_body_size, queued=queued, - request_id_header=config.http_options.request_id_header, + request_id_header=request_id_header, ) self.requestFactory = request_factory # type: ignore From b44889495de440a07029097bb5b5b35301167163 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Sep 2022 13:26:03 -0500 Subject: [PATCH 03/10] Add changelog --- changelog.d/13801.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13801.feature diff --git a/changelog.d/13801.feature b/changelog.d/13801.feature new file mode 100644 index 000000000000..d86608f5ca2a --- /dev/null +++ b/changelog.d/13801.feature @@ -0,0 +1 @@ +Add `listeners[x].request_id_header` config to specify which request header to extract and use as the request ID in order to correlate timeouts in reverse-proxy layers in front of Synapse when tracing. From 51cb36391899a79b155c6a00bf04266bca96df91 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Sep 2022 13:30:12 -0500 Subject: [PATCH 04/10] More clear docs --- docs/usage/configuration/config_documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index f22ec1461fa0..ff8c8761ed19 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -437,7 +437,7 @@ Sub-options for each listener include: behind a reverse-proxy. * `request_id_header`: The header extracted from each incoming request that is - used as the basis for the request ID which is used in logs and tracing to + used as the basis for the request ID. The request ID used in logs and tracing to correlate and match up requests. When unset, Synapse will generate automatic sequential request IDs. This option is useful when Synapse is behind a reverse-proxy. For example, if you use Cloudflare in front of Synapse, you From f6fb0c8e4f209eec02f4855548c0592a47f8b011 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Sep 2022 13:59:29 -0500 Subject: [PATCH 05/10] Fix variable reference --- synapse/http/site.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index ae9ff2559c2e..55a6afce3552 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -174,10 +174,11 @@ def set_opentracing_span(self, span: "opentracing.Span") -> None: self._opentracing_span = span def get_request_id(self) -> str: + request_id_value = None if self.request_id_header: request_id_value = self.getHeader(self.request_id_header) - if not request_id_value: + if request_id_value is None: request_id_value = str(self.request_seq) return "%s-%s" % (self.get_method(), request_id_value) From 2d093241b2999152256399090f9d4ebc6ff3099d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 20:38:50 -0500 Subject: [PATCH 06/10] Update docs from review --- docs/reverse_proxy.md | 4 ++++ docs/usage/configuration/config_documentation.md | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md index d1618e815526..4e7a1d443533 100644 --- a/docs/reverse_proxy.md +++ b/docs/reverse_proxy.md @@ -45,6 +45,10 @@ listens to traffic on localhost. (Do not change `bind_addresses` to `127.0.0.1` when using a containerized Synapse, as that will prevent it from responding to proxied traffic.) +Optionally, you can also set +[`request_id_header`](../usage/configuration/config_documentation.md#listeners) +so that the server extracts and re-uses the same request ID format that the +reverse proxy is using. ## Reverse-proxy configuration examples diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index ff8c8761ed19..485b035e65da 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -434,15 +434,15 @@ Sub-options for each listener include: * `tls`: set to true to enable TLS for this listener. Will use the TLS key/cert specified in tls_private_key_path / tls_certificate_path. * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is - behind a reverse-proxy. + behind a [reverse-proxy](../../reverse_proxy.md). * `request_id_header`: The header extracted from each incoming request that is - used as the basis for the request ID. The request ID used in logs and tracing to - correlate and match up requests. When unset, Synapse will generate automatic - sequential request IDs. This option is useful when Synapse is behind a - reverse-proxy. For example, if you use Cloudflare in front of Synapse, you - can set this as `"cf-ray"` to match up requests even when the Cloudflare - layer times out before Synapse is done. + used as the basis for the request ID. The request ID is used in logs and + tracing to correlate and match up requests. When unset, Synapse will + automatically generate sequential request IDs. This option is useful when + Synapse is behind a [reverse-proxy](../../reverse_proxy.md). + + _Added in Synapse 1.68.0._ * `resources`: Only valid for an 'http' listener. A list of resources to host on this port. Sub-options for each resource are: From b1f527f794f3de7cdaa3a3a806f7ab4515136d30 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 20:45:22 -0500 Subject: [PATCH 07/10] Validate that listener is a dict See https://github.com/matrix-org/synapse/pull/13801#discussion_r971409823 --- synapse/config/server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index 813ea91e0ac6..777d8972d603 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -888,8 +888,11 @@ def read_gc_thresholds( ) -def parse_listener_def(num: int, listener: JsonDict) -> ListenerConfig: +def parse_listener_def(num: int, listener: Any) -> ListenerConfig: """parse a listener config from the config file""" + if not isinstance(listener, dict): + raise ConfigError(f"listeners[{num}] must be a dictionary") + listener_type = listener["type"] # Raise a helpful error if direct TCP replication is still configured. if listener_type == "replication": From f851f58c4cd96e36b59716bb65f0dfaf3fbef4a7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 20:47:40 -0500 Subject: [PATCH 08/10] Generalize changelog See https://github.com/matrix-org/synapse/pull/13801#discussion_r971404100 --- changelog.d/13801.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/13801.feature b/changelog.d/13801.feature index d86608f5ca2a..d7cedfd3027c 100644 --- a/changelog.d/13801.feature +++ b/changelog.d/13801.feature @@ -1 +1 @@ -Add `listeners[x].request_id_header` config to specify which request header to extract and use as the request ID in order to correlate timeouts in reverse-proxy layers in front of Synapse when tracing. +Add `listeners[x].request_id_header` config to specify which request header to extract and use as the request ID in order to correlate requests from a reverse-proxy. From 3d73210f5a100d2767f173535a59402b5c4936a2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 20:53:33 -0500 Subject: [PATCH 09/10] More robust validation --- synapse/config/server.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index 777d8972d603..f2353ce5fb0a 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -521,9 +521,11 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ): raise ConfigError("allowed_avatar_mimetypes must be a list") - self.listeners = [ - parse_listener_def(i, x) for i, x in enumerate(config.get("listeners", [])) - ] + listeners = config.get("listeners", []) + if not isinstance(listeners, list): + raise ConfigError("Expected a list", ("listeners",)) + + self.listeners = [parse_listener_def(i, x) for i, x in enumerate(listeners)] # no_tls is not really supported any more, but let's grandfather it in # here. @@ -891,7 +893,7 @@ def read_gc_thresholds( def parse_listener_def(num: int, listener: Any) -> ListenerConfig: """parse a listener config from the config file""" if not isinstance(listener, dict): - raise ConfigError(f"listeners[{num}] must be a dictionary") + raise ConfigError("Expected a dictionary", ("listeners", str(num))) listener_type = listener["type"] # Raise a helpful error if direct TCP replication is still configured. From 5e79b022ae2053c3be8036535df6fe10bf4db882 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 15 Sep 2022 14:48:28 -0500 Subject: [PATCH 10/10] Add cross-link from config docs to request log format See https://github.com/matrix-org/synapse/pull/13801#discussion_r971964885 --- docs/usage/configuration/config_documentation.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 485b035e65da..69d305b62e6d 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -437,10 +437,11 @@ Sub-options for each listener include: behind a [reverse-proxy](../../reverse_proxy.md). * `request_id_header`: The header extracted from each incoming request that is - used as the basis for the request ID. The request ID is used in logs and - tracing to correlate and match up requests. When unset, Synapse will - automatically generate sequential request IDs. This option is useful when - Synapse is behind a [reverse-proxy](../../reverse_proxy.md). + used as the basis for the request ID. The request ID is used in + [logs](../administration/request_log.md#request-log-format) and tracing to + correlate and match up requests. When unset, Synapse will automatically + generate sequential request IDs. This option is useful when Synapse is behind + a [reverse-proxy](../../reverse_proxy.md). _Added in Synapse 1.68.0._