Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #3138 from matrix-org/rav/catch_unhandled_exceptions
Browse files Browse the repository at this point in the history
Improve exception handling for background processes
  • Loading branch information
richvdh authored Apr 27, 2018
2 parents 53494c3 + 9255a6c commit 9c3da24
Show file tree
Hide file tree
Showing 20 changed files with 335 additions and 237 deletions.
15 changes: 10 additions & 5 deletions synapse/app/appservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@
from synapse.server import HomeServer
from synapse.storage.engines import create_engine
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, preserve_fn
from synapse.util.logcontext import LoggingContext, run_in_background
from synapse.util.manhole import manhole
from synapse.util.versionstring import get_version_string
from twisted.internet import reactor
from twisted.internet import reactor, defer
from twisted.web.resource import NoResource

logger = logging.getLogger("synapse.app.appservice")
Expand Down Expand Up @@ -112,9 +112,14 @@ def on_rdata(self, stream_name, token, rows):

if stream_name == "events":
max_stream_id = self.store.get_room_max_stream_ordering()
preserve_fn(
self.appservice_handler.notify_interested_services
)(max_stream_id)
run_in_background(self._notify_app_services, max_stream_id)

@defer.inlineCallbacks
def _notify_app_services(self, room_stream_id):
try:
yield self.appservice_handler.notify_interested_services(room_stream_id)
except Exception:
logger.exception("Error notifying application services of event")


def start(config_options):
Expand Down
27 changes: 15 additions & 12 deletions synapse/app/federation_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,19 +237,22 @@ def process_replication_rows(self, stream_name, token, rows):

@defer.inlineCallbacks
def update_token(self, token):
self.federation_position = token

# We linearize here to ensure we don't have races updating the token
with (yield self._fed_position_linearizer.queue(None)):
if self._last_ack < self.federation_position:
yield self.store.update_federation_out_pos(
"federation", self.federation_position
)
try:
self.federation_position = token

# We linearize here to ensure we don't have races updating the token
with (yield self._fed_position_linearizer.queue(None)):
if self._last_ack < self.federation_position:
yield self.store.update_federation_out_pos(
"federation", self.federation_position
)

# We ACK this token over replication so that the master can drop
# its in memory queues
self.replication_client.send_federation_ack(self.federation_position)
self._last_ack = self.federation_position
# We ACK this token over replication so that the master can drop
# its in memory queues
self.replication_client.send_federation_ack(self.federation_position)
self._last_ack = self.federation_position
except Exception:
logger.exception("Error updating federation stream position")


if __name__ == '__main__':
Expand Down
31 changes: 17 additions & 14 deletions synapse/app/pusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,23 @@ def on_rdata(self, stream_name, token, rows):

@defer.inlineCallbacks
def poke_pushers(self, stream_name, token, rows):
if stream_name == "pushers":
for row in rows:
if row.deleted:
yield self.stop_pusher(row.user_id, row.app_id, row.pushkey)
else:
yield self.start_pusher(row.user_id, row.app_id, row.pushkey)
elif stream_name == "events":
yield self.pusher_pool.on_new_notifications(
token, token,
)
elif stream_name == "receipts":
yield self.pusher_pool.on_new_receipts(
token, token, set(row.room_id for row in rows)
)
try:
if stream_name == "pushers":
for row in rows:
if row.deleted:
yield self.stop_pusher(row.user_id, row.app_id, row.pushkey)
else:
yield self.start_pusher(row.user_id, row.app_id, row.pushkey)
elif stream_name == "events":
yield self.pusher_pool.on_new_notifications(
token, token,
)
elif stream_name == "receipts":
yield self.pusher_pool.on_new_receipts(
token, token, set(row.room_id for row in rows)
)
except Exception:
logger.exception("Error poking pushers")

def stop_pusher(self, user_id, app_id, pushkey):
key = "%s:%s" % (app_id, pushkey)
Expand Down
95 changes: 49 additions & 46 deletions synapse/app/synchrotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,55 +340,58 @@ def get_currently_syncing_users(self):

@defer.inlineCallbacks
def process_and_notify(self, stream_name, token, rows):
if stream_name == "events":
# We shouldn't get multiple rows per token for events stream, so
# we don't need to optimise this for multiple rows.
for row in rows:
event = yield self.store.get_event(row.event_id)
extra_users = ()
if event.type == EventTypes.Member:
extra_users = (event.state_key,)
max_token = self.store.get_room_max_stream_ordering()
self.notifier.on_new_room_event(
event, token, max_token, extra_users
try:
if stream_name == "events":
# We shouldn't get multiple rows per token for events stream, so
# we don't need to optimise this for multiple rows.
for row in rows:
event = yield self.store.get_event(row.event_id)
extra_users = ()
if event.type == EventTypes.Member:
extra_users = (event.state_key,)
max_token = self.store.get_room_max_stream_ordering()
self.notifier.on_new_room_event(
event, token, max_token, extra_users
)
elif stream_name == "push_rules":
self.notifier.on_new_event(
"push_rules_key", token, users=[row.user_id for row in rows],
)
elif stream_name == "push_rules":
self.notifier.on_new_event(
"push_rules_key", token, users=[row.user_id for row in rows],
)
elif stream_name in ("account_data", "tag_account_data",):
self.notifier.on_new_event(
"account_data_key", token, users=[row.user_id for row in rows],
)
elif stream_name == "receipts":
self.notifier.on_new_event(
"receipt_key", token, rooms=[row.room_id for row in rows],
)
elif stream_name == "typing":
self.typing_handler.process_replication_rows(token, rows)
self.notifier.on_new_event(
"typing_key", token, rooms=[row.room_id for row in rows],
)
elif stream_name == "to_device":
entities = [row.entity for row in rows if row.entity.startswith("@")]
if entities:
elif stream_name in ("account_data", "tag_account_data",):
self.notifier.on_new_event(
"to_device_key", token, users=entities,
"account_data_key", token, users=[row.user_id for row in rows],
)
elif stream_name == "device_lists":
all_room_ids = set()
for row in rows:
room_ids = yield self.store.get_rooms_for_user(row.user_id)
all_room_ids.update(room_ids)
self.notifier.on_new_event(
"device_list_key", token, rooms=all_room_ids,
)
elif stream_name == "presence":
yield self.presence_handler.process_replication_rows(token, rows)
elif stream_name == "receipts":
self.notifier.on_new_event(
"groups_key", token, users=[row.user_id for row in rows],
)
elif stream_name == "receipts":
self.notifier.on_new_event(
"receipt_key", token, rooms=[row.room_id for row in rows],
)
elif stream_name == "typing":
self.typing_handler.process_replication_rows(token, rows)
self.notifier.on_new_event(
"typing_key", token, rooms=[row.room_id for row in rows],
)
elif stream_name == "to_device":
entities = [row.entity for row in rows if row.entity.startswith("@")]
if entities:
self.notifier.on_new_event(
"to_device_key", token, users=entities,
)
elif stream_name == "device_lists":
all_room_ids = set()
for row in rows:
room_ids = yield self.store.get_rooms_for_user(row.user_id)
all_room_ids.update(room_ids)
self.notifier.on_new_event(
"device_list_key", token, rooms=all_room_ids,
)
elif stream_name == "presence":
yield self.presence_handler.process_replication_rows(token, rows)
elif stream_name == "receipts":
self.notifier.on_new_event(
"groups_key", token, users=[row.user_id for row in rows],
)
except Exception:
logger.exception("Error processing replication")


def start(config_options):
Expand Down
13 changes: 10 additions & 3 deletions synapse/app/user_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
from synapse.storage.user_directory import UserDirectoryStore
from synapse.util.caches.stream_change_cache import StreamChangeCache
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.logcontext import LoggingContext, preserve_fn
from synapse.util.logcontext import LoggingContext, run_in_background
from synapse.util.manhole import manhole
from synapse.util.versionstring import get_version_string
from twisted.internet import reactor
from twisted.internet import reactor, defer
from twisted.web.resource import NoResource

logger = logging.getLogger("synapse.app.user_dir")
Expand Down Expand Up @@ -164,7 +164,14 @@ def on_rdata(self, stream_name, token, rows):
stream_name, token, rows
)
if stream_name == "current_state_deltas":
preserve_fn(self.user_directory.notify_new_event)()
run_in_background(self._notify_directory)

@defer.inlineCallbacks
def _notify_directory(self):
try:
yield self.user_directory.notify_new_event()
except Exception:
logger.exception("Error notifiying user directory of state update")


def start(config_options):
Expand Down
25 changes: 14 additions & 11 deletions synapse/appservice/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,17 +176,20 @@ def add_recoverers(self, recoverers):

@defer.inlineCallbacks
def _start_recoverer(self, service):
yield self.store.set_appservice_state(
service,
ApplicationServiceState.DOWN
)
logger.info(
"Application service falling behind. Starting recoverer. AS ID %s",
service.id
)
recoverer = self.recoverer_fn(service, self.on_recovered)
self.add_recoverers([recoverer])
recoverer.recover()
try:
yield self.store.set_appservice_state(
service,
ApplicationServiceState.DOWN
)
logger.info(
"Application service falling behind. Starting recoverer. AS ID %s",
service.id
)
recoverer = self.recoverer_fn(service, self.on_recovered)
self.add_recoverers([recoverer])
recoverer.recover()
except Exception:
logger.exception("Error starting AS recoverer")

@defer.inlineCallbacks
def _is_service_up(self, service):
Expand Down
93 changes: 48 additions & 45 deletions synapse/crypto/keyring.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,53 +146,56 @@ def _start_key_lookups(self, verify_requests):
verify_requests (List[VerifyKeyRequest]):
"""

# create a deferred for each server we're going to look up the keys
# for; we'll resolve them once we have completed our lookups.
# These will be passed into wait_for_previous_lookups to block
# any other lookups until we have finished.
# The deferreds are called with no logcontext.
server_to_deferred = {
rq.server_name: defer.Deferred()
for rq in verify_requests
}

# We want to wait for any previous lookups to complete before
# proceeding.
yield self.wait_for_previous_lookups(
[rq.server_name for rq in verify_requests],
server_to_deferred,
)

# Actually start fetching keys.
self._get_server_verify_keys(verify_requests)

# When we've finished fetching all the keys for a given server_name,
# resolve the deferred passed to `wait_for_previous_lookups` so that
# any lookups waiting will proceed.
#
# map from server name to a set of request ids
server_to_request_ids = {}

for verify_request in verify_requests:
server_name = verify_request.server_name
request_id = id(verify_request)
server_to_request_ids.setdefault(server_name, set()).add(request_id)

def remove_deferreds(res, verify_request):
server_name = verify_request.server_name
request_id = id(verify_request)
server_to_request_ids[server_name].discard(request_id)
if not server_to_request_ids[server_name]:
d = server_to_deferred.pop(server_name, None)
if d:
d.callback(None)
return res

for verify_request in verify_requests:
verify_request.deferred.addBoth(
remove_deferreds, verify_request,
try:
# create a deferred for each server we're going to look up the keys
# for; we'll resolve them once we have completed our lookups.
# These will be passed into wait_for_previous_lookups to block
# any other lookups until we have finished.
# The deferreds are called with no logcontext.
server_to_deferred = {
rq.server_name: defer.Deferred()
for rq in verify_requests
}

# We want to wait for any previous lookups to complete before
# proceeding.
yield self.wait_for_previous_lookups(
[rq.server_name for rq in verify_requests],
server_to_deferred,
)

# Actually start fetching keys.
self._get_server_verify_keys(verify_requests)

# When we've finished fetching all the keys for a given server_name,
# resolve the deferred passed to `wait_for_previous_lookups` so that
# any lookups waiting will proceed.
#
# map from server name to a set of request ids
server_to_request_ids = {}

for verify_request in verify_requests:
server_name = verify_request.server_name
request_id = id(verify_request)
server_to_request_ids.setdefault(server_name, set()).add(request_id)

def remove_deferreds(res, verify_request):
server_name = verify_request.server_name
request_id = id(verify_request)
server_to_request_ids[server_name].discard(request_id)
if not server_to_request_ids[server_name]:
d = server_to_deferred.pop(server_name, None)
if d:
d.callback(None)
return res

for verify_request in verify_requests:
verify_request.deferred.addBoth(
remove_deferreds, verify_request,
)
except Exception:
logger.exception("Error starting key lookups")

@defer.inlineCallbacks
def wait_for_previous_lookups(self, server_names, server_to_deferred):
"""Waits for any previous key lookups for the given servers to finish.
Expand Down
2 changes: 2 additions & 0 deletions synapse/federation/transaction_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ def send_presence(self, states):
break

yield self._process_presence_inner(states_map.values())
except Exception:
logger.exception("Error sending presence states to servers")
finally:
self._processing_pending_presence = False

Expand Down
Loading

0 comments on commit 9c3da24

Please sign in to comment.