Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Retry dead servers a lot less often #340

Merged
merged 1 commit into from
Nov 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions synapse/http/matrixfederationclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import simplejson as json
import logging
import random
import sys
import urllib
import urlparse
Expand All @@ -55,6 +56,9 @@
)


MAX_RETRIES = 4


class MatrixFederationEndpointFactory(object):
def __init__(self, hs):
self.tls_server_context_factory = hs.tls_server_context_factory
Expand Down Expand Up @@ -119,7 +123,7 @@ def _create_request(self, destination, method, path_bytes,

# XXX: Would be much nicer to retry only at the transaction-layer
# (once we have reliable transactions in place)
retries_left = 5
retries_left = MAX_RETRIES

http_url_bytes = urlparse.urlunparse(
("", "", path_bytes, param_bytes, query_bytes, "")
Expand Down Expand Up @@ -180,7 +184,9 @@ def send_request():
)

if retries_left and not timeout:
yield sleep(2 ** (5 - retries_left))
delay = 5 ** (MAX_RETRIES + 1 - retries_left)
delay *= random.uniform(0.8, 1.4)
yield sleep(delay)
retries_left -= 1
else:
raise
Expand Down
7 changes: 5 additions & 2 deletions synapse/util/retryutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from synapse.api.errors import CodeMessageException

import logging
import random


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -85,8 +86,9 @@ def get_retry_limiter(destination, clock, store, **kwargs):

class RetryDestinationLimiter(object):
def __init__(self, destination, clock, store, retry_interval,
min_retry_interval=5000, max_retry_interval=60 * 60 * 1000,
multiplier_retry_interval=2,):
min_retry_interval=10 * 60 * 1000,
max_retry_interval=24 * 60 * 60 * 1000,
multiplier_retry_interval=5,):
"""Marks the destination as "down" if an exception is thrown in the
context, except for CodeMessageException with code < 500.

Expand Down Expand Up @@ -140,6 +142,7 @@ def err(failure):
# We couldn't connect.
if self.retry_interval:
self.retry_interval *= self.multiplier_retry_interval
self.retry_interval *= int(random.uniform(0.8, 1.4))

if self.retry_interval >= self.max_retry_interval:
self.retry_interval = self.max_retry_interval
Expand Down