Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Allow configuring maximum interval between AS transaction retries #12685

Closed
wants to merge 11 commits into from
27 changes: 24 additions & 3 deletions synapse/appservice/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
Optional,
Set,
Tuple,
Union,
)

from synapse.appservice import (
Expand All @@ -68,6 +69,7 @@
TransactionUnusedFallbackKeys,
)
from synapse.appservice.api import ApplicationServiceApi
from synapse.config.appservice import AppServiceConfig
from synapse.events import EventBase
from synapse.logging.context import run_in_background
from synapse.metrics.background_process_metrics import run_as_background_process
Expand Down Expand Up @@ -102,7 +104,9 @@ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self.as_api = hs.get_application_service_api()

self.txn_ctrl = _TransactionController(self.clock, self.store, self.as_api)
self.txn_ctrl = _TransactionController(
self.clock, self.store, self.as_api, hs.config.appservice
)
self.queuer = _ServiceQueuer(self.txn_ctrl, self.clock, hs)

async def start(self) -> None:
Expand Down Expand Up @@ -347,10 +351,17 @@ class _TransactionController:
(Note we have only have one of these in the homeserver.)
"""

def __init__(self, clock: Clock, store: DataStore, as_api: ApplicationServiceApi):
def __init__(
self,
clock: Clock,
store: DataStore,
as_api: ApplicationServiceApi,
as_config: AppServiceConfig,
):
self.clock = clock
self.store = store
self.as_api = as_api
self.as_config = as_config

# map from service id to recoverer instance
self.recoverers: Dict[str, "_Recoverer"] = {}
Expand Down Expand Up @@ -430,7 +441,12 @@ def start_recoverer(self, service: ApplicationService) -> None:
logger.info("Starting recoverer for AS ID %s", service.id)
assert service.id not in self.recoverers
recoverer = self.RECOVERER_CLASS(
self.clock, self.store, self.as_api, service, self.on_recovered
self.clock,
self.store,
self.as_api,
service,
self.on_recovered,
self.as_config.appservice_max_backoff,
)
self.recoverers[service.id] = recoverer
recoverer.recover()
Expand All @@ -452,6 +468,7 @@ class _Recoverer:
as_api (synapse.appservice.api.ApplicationServiceApi):
service (synapse.appservice.ApplicationService): the service we are managing
callback (callable[_Recoverer]): called once the service recovers.
max_backoff (int|None): maximum interval between retries
"""

def __init__(
Expand All @@ -461,13 +478,15 @@ def __init__(
as_api: ApplicationServiceApi,
service: ApplicationService,
callback: Callable[["_Recoverer"], Awaitable[None]],
max_backoff: Union[int, None],
tadzik marked this conversation as resolved.
Show resolved Hide resolved
):
self.clock = clock
self.store = store
self.as_api = as_api
self.service = service
self.callback = callback
self.backoff_counter = 1
self.max_backoff = max_backoff

def recover(self) -> None:
def _retry() -> None:
Expand All @@ -476,6 +495,8 @@ def _retry() -> None:
)

delay = 2**self.backoff_counter
if self.max_backoff is not None and delay > self.max_backoff:
delay = self.max_backoff
logger.info("Scheduling retries on %s in %fs", self.service.id, delay)
self.clock.call_later(delay, _retry)

Expand Down
7 changes: 7 additions & 0 deletions synapse/config/appservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class AppServiceConfig(Config):
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
self.app_service_config_files = config.get("app_service_config_files", [])
self.track_appservice_user_ips = config.get("track_appservice_user_ips", False)
self.appservice_max_backoff = config.get("appservice_max_backoff", None)

def generate_config_section(cls, **kwargs: Any) -> str:
return """\
Expand All @@ -47,6 +48,12 @@ def generate_config_section(cls, **kwargs: Any) -> str:
# enables MAU tracking for application service users.
#
#track_appservice_user_ips: true

# Set to establish maximum backoff (in seconds) between HS -> AS connection attempts.
# Upon failing to push appservice events the homeserver will wait
# an increasing amount of seconds between retries. This sets an upper limit of that (in seconds)
tadzik marked this conversation as resolved.
Show resolved Hide resolved
#
#appservice_max_backoff: 60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explicitly say what the default value/behaviour is, if this config key is not provided?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done now in 727bbf8

"""


Expand Down