From 950b0ac8039630a89c3a0cc0d86f3808c57982cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Csord=C3=A1s?= Date: Wed, 27 Jan 2021 14:10:25 +0100 Subject: [PATCH] [server] Configure keepalive When using a CodeChecker server with `Docker Swarm` on the server side we may get a `[Errno 104] Connection reset by peer` exception when for example the storage takes more than 15 minutes. With the keepalive parameters we can solve this problem by not destroying the connection when an operation takes too much time. --- docs/web/server_config.md | 47 +++++++++++++++++++ web/server/codechecker_server/server.py | 44 +++++++++++++++++ .../codechecker_server/session_manager.py | 17 +++++++ web/server/config/server_config.json | 6 +++ 4 files changed, 114 insertions(+) diff --git a/docs/web/server_config.md b/docs/web/server_config.md index 040122c52a..5aed45383a 100644 --- a/docs/web/server_config.md +++ b/docs/web/server_config.md @@ -65,6 +65,53 @@ size of uploadable compilation database file in *bytes*. *Default value*: 104857600 bytes = 100 MB +### Keepalive +Linux has built-in support for keepalive. When using a CodeChecker server +with `Docker Swarm` it is recommended to use the following settings: +```json +{ + "keepalive": { + "enabled": true, + "idle": 600, + "interval": 30, + "max_probe": 10 + } +} +``` + +Otherwise you may get a `[Errno 104] Connection reset by peer` exception on the +server side and the client may hang forever. + +For more detailed information about these configuration option see: +https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/usingkeepalive.html + +For more information about this problem can be found here: +https://github.com/moby/moby/issues/31208#issuecomment-303905737 + +#### Idle time +The interval between the last data packet sent (simple ACKs are not considered +data) and the first keepalive probe. + +By default the server will use the value from your host configured by the +`net.ipv4.tcp_keepalive_time` parameter. This value can be overriden by the +`idle` key in the server configuration file. + +#### Interval time +The interval between subsequential keepalive probes, regardless of what the +connection has exchanged in the meantime. + +By default the server will use the value from your host configured by the +`net.ipv4.tcp_keepalive_intvl` parameter. This value can be overriden by the +`interval` key in the server configuration file. + +#### Probes +The number of unacknowledged probes to send before considering the connection +dead and notifying the application layer. + +By default the server will use the value from your host configured by the +`net.ipv4.tcp_keepalive_probes` parameter. This value can be overriden by the +`max_probe` key in the server configuration file. + ## Authentication For authentication configuration options and which options can be reloaded see the [Authentication](authentication.md) documentation. diff --git a/web/server/codechecker_server/server.py b/web/server/codechecker_server/server.py index b228ae9f9a..c1c2f47fdb 100644 --- a/web/server/codechecker_server/server.py +++ b/web/server/codechecker_server/server.py @@ -742,6 +742,9 @@ def __init__(self, bind_and_activate=True) ssl_key_file = os.path.join(config_directory, "key.pem") ssl_cert_file = os.path.join(config_directory, "cert.pem") + + self.configure_keepalive() + if os.path.isfile(ssl_key_file) and os.path.isfile(ssl_cert_file): LOG.info("Initiating SSL. Server listening on secure socket.") LOG.debug("Using cert file: %s", ssl_cert_file) @@ -759,6 +762,47 @@ def __init__(self, LOG.error("Couldn't start the server: %s", e.__str__()) raise + def configure_keepalive(self): + """ + Enable keepalive on the socket and some TCP keepalive configuration + option based on the server configuration file. + """ + if not self.manager.is_keepalive_enabled(): + return + + keepalive_is_on = self.socket.getsockopt(socket.SOL_SOCKET, + socket.SO_KEEPALIVE) + if keepalive_is_on != 0: + LOG.debug('Socket keepalive already on.') + else: + LOG.debug('Socket keepalive off, turning on.') + + ret = self.socket.setsockopt(socket.SOL_SOCKET, + socket.SO_KEEPALIVE, 1) + if ret: + LOG.error('Failed to set socket keepalive: %s', ret) + + idle = self.manager.get_keepalive_idle() + if idle: + ret = self.socket.setsockopt(socket.IPPROTO_TCP, + socket.TCP_KEEPIDLE, idle) + if ret: + LOG.error('Failed to set TCP keepalive idle: %s', ret) + + interval = self.manager.get_keepalive_interval() + if interval: + ret = self.socket.setsockopt(socket.IPPROTO_TCP, + socket.TCP_KEEPINTVL, interval) + if ret: + LOG.error('Failed to set TCP keepalive interval: %s', ret) + + max_probe = self.manager.get_keepalive_max_probe() + if max_probe: + ret = self.socket.setsockopt(socket.IPPROTO_TCP, + socket.TCP_KEEPCNT, max_probe) + if ret: + LOG.error('Failed to set TCP max keepalive probe: %s', ret) + def terminate(self): """ Terminating the server. diff --git a/web/server/codechecker_server/session_manager.py b/web/server/codechecker_server/session_manager.py index 592d00c123..e3c133d471 100644 --- a/web/server/codechecker_server/session_manager.py +++ b/web/server/codechecker_server/session_manager.py @@ -180,6 +180,7 @@ def __init__(self, configuration_file, root_sha, force_auth=False): self.__worker_processes = get_worker_processes(scfg_dict) self.__max_run_count = scfg_dict.get('max_run_count', None) self.__store_config = scfg_dict.get('store', {}) + self.__keepalive_config = scfg_dict.get('keepalive', {}) self.__auth_config = scfg_dict['authentication'] if force_auth: @@ -657,6 +658,22 @@ def get_compilation_database_size(self): limit = self.__store_config.get('limit', {}) return limit.get('compilation_database_size') + def is_keepalive_enabled(self): + """ """ + return self.__keepalive_config.get('enabled', True) + + def get_keepalive_idle(self): + """ """ + return self.__keepalive_config.get('idle') + + def get_keepalive_interval(self): + """ """ + return self.__keepalive_config.get('interval') + + def get_keepalive_max_probe(self): + """ """ + return self.__keepalive_config.get('max_probe') + def __get_local_session_from_db(self, token): """ Creates a local session if a valid session token can be found in the diff --git a/web/server/config/server_config.json b/web/server/config/server_config.json index 364273e830..ab64d67817 100644 --- a/web/server/config/server_config.json +++ b/web/server/config/server_config.json @@ -8,6 +8,12 @@ "compilation_database_size": 104857600 } }, + "keepalive": { + "enabled": false, + "idle": 600, + "interval": 30, + "max_probe": 10 + }, "authentication": { "enabled" : false, "realm_name" : "CodeChecker Privileged server",