diff --git a/.circleci/config.yml b/.circleci/config.yml index 2b3947cd9..19a6aeb63 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -197,6 +197,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 @@ -233,6 +234,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 @@ -271,6 +273,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 @@ -310,6 +313,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 diff --git a/docs/admins/availability-and-migration.rst b/docs/admins/availability-and-migration.rst index 9a50aaffd..7e49de73b 100644 --- a/docs/admins/availability-and-migration.rst +++ b/docs/admins/availability-and-migration.rst @@ -30,6 +30,8 @@ IRRd instances in a number of cases: * You are using one IRRd instance as the active instance, and would like to have a second on standby to promote to the active instance with the most recent data. +* You have a large volume of queries and want to distribute load over + multiple instances. This document mainly discusses three kinds of IRRd instances: @@ -47,19 +49,190 @@ This document mainly discusses three kinds of IRRd instances: that processes authoritative changes, and is the single source of truth, at one point in time. IRRd does not support having multiple active instances. -This document suggest three different approaches for configuring this, -each with their own upsides and downsides. +.. warning:: + Previous versions of IRRd and this documentation suggested standby servers + with NRTM as an option. This option is strongly recommended against, due to + incompatibility with :doc:`object suppression ` + along with other issues regarding mirror synchronisation. + The ``sources.{name}.export_destination_unfiltered`` and + ``sources.{name}.export_destination`` settings are deprecated. + + +Using PostgreSQL replication for standby and query-only instances +----------------------------------------------------------------- +The best option to run either standby or query-only instance is using +PostgreSQL replication. All persistent IRRD data is stored in the +PostgreSQL database, and will therefore be included. +PostgreSQL replication will also ensure all journal entries and +serials remain the same after a switch. +:doc:`Suppressed objects `, e.g. by RPKI +validation, and suspended objects, +are correctly included in the replication as well. + +There are several important requirements for this setup: + +* The standby must run a PostgreSQL streaming replication from the + active instance. Logical replication is not supported. +* The PostgreSQL configuration must have ``track_commit_timestamp`` + and ``hot_standby_feedback`` enabled. +* On the standby, you run the IRRD instance with the ``readonly_standby`` + parameters set. +* The standby instance must use its own Redis instance. Do not use + Redis replication. +* ``rpki.roa_source`` must be consistent between active and standby + configurations. +* You are recommended to keep other settings, like ``scopefilter``, + ``sources.{name}.route_object_preference``, + ``sources.{name}.object_class_filter`` consistent between active + and standby. Note that you can not set + ``sources.{name}.authoritative``, ``sources.{name}.nrtm_host``, or + ``sources.{name}.import_source`` on a standby instance, as these + conflict with ``readonly_standby``. +* It is recommended that all PostgreSQL instances only host the IRRd + database. Streaming replication will always include all databases, + and commits received on the standby in any database will trigger + a local preloaded data refresh. +* Although the details of PostgreSQL are out of scope for + this documentation, the use of replication slots is recommended. + Make sure to drop a replication slot if you decommission a + standby server, to prevent infinite PostgreSQL WAL growth. + +As replication replicates the entire database, any IRR registries +mirrored on the active instance, are also mirrored on the standby, +through the PostgreSQL replication process. + +Consistency in object suppression settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you query IRRD's configuration on a standby, e.g. with the ``!J`` +query, it will reflect the local configuration regarding +:doc:`object suppression settings `. +However, the standby does not use these settings: its database is +read only, and instead the suppression is applied by the active +instance and then replicated. + +For consistency in this query output, and reduced risk of configuration +inconsistencies after promoting a standby, you are encouraged to keep +the object suppression settings identical on all instances, even +if some are (currently) not used. + +For RPKI, ``rpki.roa_source`` must be consistent between active and +standby, because that setting determines whether the query parser +considers ``RPKI`` a valid source. + +Promoting a standby instance to active +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The general plan for promoting an IRRDv4 instance is: + +* Hold all update emails. +* Ensure PostgreSQL replication is up to date. +* Promote the PostgreSQL replica to become a main server. +* Disable the ``readonly_standby`` setting in IRRd. +* Make sure your IRRD configuration on the standby is up to date + compared to the old active (ideally, manage this continuously). + Make sure the ``authoritative`` setting is enabled on your authoritative + source, and mirroring settings for any mirrored sources, e.g. + ``nrtm_host`` are correct. +* Start the IRRd instance. +* Redirect queries to the new instance. +* Run the ``irrd_load_pgp_keys`` command to load all PGP keys from + authoritative sources into the local keychain, allowing them to be used + for authentication. +* Redirect update emails to the new instance. +* Ensure published exports are now taken from the new instance. +* Check the mirroring status to ensure the new active instance + has access to all exports and NRTM streams (some other operators + restrict NRTM access to certain IPs). + +.. warning:: + If users use IRRD internal authentication, by logging in through + the web interface, ensure you have a consistent URL, i.e. + direct to the current active instance by DNS records. WebAuthn + tokens are tied to the URL as seen by the browser, and will + become unusable if you change the URL. + +Upgrading IRRD +~~~~~~~~~~~~~~ +When upgrading your IRRD instances, first upgrade the active instance, +then the standby instances. If you need to run ``irrd_database_upgrade`` +as part of the upgrade, only do so on the active instance. PostgreSQL +replication will include the schema changes and update standby +databases. + +.. note:: + During the time between the database upgrade and upgrading the IRRD + version on a standby instance, queries on the standby instance may fail. + This depends on the exact changes between versions. + +You are encouraged to always test upgrades yourself before applying them +in production. + +Preload data refresh on standby instances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There is one inefficiency in the replication process: like an active +instance, a standby instance will keep certain data in memory and/or +Redis for performance reasons. This data needs to be refreshed if +certain data changes in the SQL database. + +On an active instance, the preloaded data is refreshed only when +relevant RPSL objects have changed. On a replica, this information +is not available. Therefore, standby instances refresh this data +after any change to the SQL database. Therefore, you may see more +load on the preload process than is typical on an active instance. +Refreshes are batched, so only a single one will run at a time. + +Due to small differences in the timing of the preload process, +there may be an additional delay in updating responses to some +queries on the standby compared to the active instance, in the +order of 15-60 seconds. +This concerns the whois queries ``!g``, ``!6``, ``!a`` and in some cases ``!i``, +and the GraphQL queries ``asnPrefixes`` and ``asSetPrefixes``. + + +Query-only instances using NRTM +------------------------------- +If you want to distribute the query load, but will never promote the +secondaries to active instances, you can use the PostgreSQL replication +method described above, or NRTM mirroring. +Consider carefully whether you really only need a query-only +instance, or may need to use it as a standby instance later. Promoting +an NRTM query-only instance to an active instance is unsupported. + +When others mirror from your instance using NRTM, you need to be aware +of serial synchronisation. There are two options: + +* Direct all NRTM queries to your active instance. Publish the RPSL export + and CURRENTSERIAL file from that instance. +* Use synchronised serials, allowing NRTM queries to be sent to any query-only + instance. Publish the RPSL export and CURRENTSERIAL file from the active + instance. + +For further details, see the +:ref:`NRTM serial handling documentation `. + +.. warning:: + When **not** using synchronised serials, NRTM users must never be switched + (e.g. by DNS changes or load balancers) to different instances, without + reloading their local copy. Otherwise they may silently lose updates. + + +Loading from a PostgreSQL backup +-------------------------------- +You can initialise an IRRD instance from a database backup, either as +part of a recovery or a planned migration. Key steps: + +* If the backup was made with an older IRRD version, run + ``irrd_database_upgrade`` to upgrade the schema. +* Run the ``irrd_load_pgp_keys`` command to load all PGP keys from + authoritative sources into the local keychain, allowing them to be used + for authentication. -Option 1: using exports and NRTM for migrations and standby instances ---------------------------------------------------------------------- -The first option is to use the same :doc:`mirroring ` -features as any other kind of IRR data mirroring. This means using the files -placed in ``sources.{name}.export_destination`` by the active instance -as the ``sources.{name}.import_source`` for the standby instances, -and having standby's follow the active NRTM stream. -If you are migrating from a legacy version of IRRd, this is most likely your -only option. +Migration from legacy IRRD +-------------------------- +To migrate from a legacy IRRD version, you can use the same +:doc:`mirroring ` features as any other kind of IRR +data mirroring. In addition to usual mirroring, you should enable +``strict_import_keycert_objects`` for the source. This is a bit different from "regular" mirroring, where the mirror is never meant to be promoted to an active instance, and instances may be run by entirely @@ -78,7 +251,6 @@ regular mirror for other registries. multiple objects in your file with the same key, IRRd will only import the last one. - Object validation ~~~~~~~~~~~~~~~~~ Mirrored sources use @@ -97,105 +269,17 @@ even in non-strict mode. These objects are logged. **While running IRRd 4 as a mirror, you should check the logs for any such objects - they will disappear when you make IRRd 4 your authoritative instance.** -GPG keychain imports -~~~~~~~~~~~~~~~~~~~~ -In short: standby instances should have ``strict_import_keycert_objects`` -enabled. - -IRRd uses GnuPG to validate PGP signatures used to authenticate authoritative -changes. This means that all `key-cert` objects need to be inserted into the -GnuPG keychain before users can submit PGP signed updates. - -By default, IRRd only inserts public PGP keys from `key-cert` objects for -authoritative sources - as there is no reason to do PGP signature validation -for non-authoritative sources. However, a standby source needs to have these -keys imported already to become active later. This can be enabled with the -``strict_import_keycert_objects`` setting on the mirror configuration. -When enabled, `key-cert` objects always use the strict importer which includes -importing into the key chain, which allows them to be used for authentication -in the future. - -If your IRRd instance already has (or may have) `key-cert` objects that were -imported without ``strict_import_keycert_objects``, you can insert them into the -local keychain with the ``irrd_load_pgp_keys`` command. - -The ``irrd_load_pgp_keys`` command may fail to import certain keys if they use -an unsupported format. It is safe to run multiple times, even if some or all -keys are already in the keychain, and safe to run while IRRd is running. - -Password hashes -~~~~~~~~~~~~~~~ -Password authentication depends on password hashes in `mntner` objects. -To improve security, these password hashes are not included in exports or -NRTM streams for regular mirrors in IRRDv4. - -However, when an IRRd mirror is a standby -instance that may need to take an active role later, it needs all password -hashes. To support this, you need to configure a special mirroring process -on the current active instance: - -* Set ``sources.{name}.export_destination_unfiltered`` to a path where IRRd - will store exports that include full password hashes. Other than including - full hashes, this works the same as ``sources.{name}.export_destination``. - Then, distribute those files to your standby instance, and point - ``import_source`` to their location. -* Set ``sources.{name}.nrtm_access_list_unfiltered`` to an access list defined - in the configuration file. Any IP on this access list will receive - full password hashes when doing NRTM requests. Other than that, NRTM works - identical to filtered queries. Set this to the IPs of your standby instances. - -On the standby instance, you do not need any specific configuration. -However, if you used previously imported `mntner` objects without full hashes -on the standby, you need to do a full reload of the data on the standby to -ensure it has full hashes for all objects. - -If you are migrating from a different IRR server, check that password -hashes are not filtered. - Serials ~~~~~~~ -Each IRRd instance potentially creates its own set of NRTM serials when +Each instance potentially creates its own set of NRTM serials when importing changes over NRTM. This means that when switching to a different instance, mirrors would have to refresh their data. -IRRd can run a mirror in synchronised serial mode. This is used by some -deployments to spread their query load over multiple read-only instances. -For further details, see the -:ref:`NRTM serial handling documentation `. - -.. warning:: - When not using synchronised serials, NRTM users must never be switched - (e.g. by DNS changes or load balancers) to different instances, without - reloading their local copy. Otherwise they may silently lose updates. - - Without synchronised serials, the RPSL export, CURRENTSERIAL file, and NRTM - feed used by a mirror must all come from the same source instance. - -RPKI and scope filter -~~~~~~~~~~~~~~~~~~~~~ -:doc:`RPKI-aware mode ` and the -:doc:`scope filter ` make invalid or out of scope -objects invisible locally. These are not included in any exports, and if -an existing object becomes invalid or out of scope, a deletion is added -to the NRTM journal. - -IRRd retains invalid or out of scope objects, and they may become visible again -if their status is changed by a configuration or ROA change. -However, a standby or query-only instance using exports and NRTM will never see -objects that are invalid or out of scope on the active instance, as they are -not included in mirroring. -Upon promoting a standby instance to an active instance, these -objects are lost permanently. - -For the same reasons, standby and query-only instances that receive their -data over NRTM can not be queried for RPKI invalid or out of scope objects, -as they never see these objects. - -Promoting a standby to the active instance -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you use IRR mirroring with exports and NRTM, the general plan for promoting -an IRRDv4 instance would be: +Promoting a IRRD mirror of a legacy instance to active +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you use IRR mirroring with exports and NRTM, the general plan for switching +from a legacy IRRD to a new IRRDv4 instance would be: * Hold all update emails. * Ensure an NRTM update has run so that the instances are in sync @@ -210,61 +294,51 @@ an IRRDv4 instance would be: * If you were not using synchronised serials, all instances mirroring from your instance, must reload their local copy. -If this is part of a planned migration from a previous version, it is -recommended that you test existing tools and queries against the new IRRDv4 -instance before promoting it to be active. - - -Option 2: PostgreSQL replication -------------------------------------------- - -.. danger:: - Since adding this section, an issue was discovered with using PostgreSQL - replication: the `local preload store may not be updated`_ causing - potential stale responses to queries. - - .. _local preload store may not be updated: https://github.com/irrdnet/irrd/issues/656 - -Except for configuration, IRRd stores all its data in the PostgreSQL database. -Redis is used for passing derived data and commands. - -You could run two IRRd instances, each on their own PostgreSQL instance, which -use PostgreSQL replication as the synchronisation mechanism. In the standby -IRRd, configure the instance as ``database_readonly`` to prevent local changes. -Note that this prevents the IRRd instance from making any changes of any kind -to the local database. +It is recommended that you test existing tools and queries against the +new IRRDv4 instance before promoting it to be active. -For Redis, you need to connect all instances to the same Redis instance, -or use `Redis replication`_. -Using PostgreSQL replication solves some of the issues mentioned for other -options, but may have other limitations or issues that are out of scope -for IRRd itself. +Background and design considerations +------------------------------------ -.. _Redis replication: https://redis.io/topics/replication - -GPG keychain imports with PostgreSQL replication -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When you use PostgreSQL replication, the same issue occurs with the GPG -keychain as with NRTM: in order to authenticate updates to authoritative -changes, the PGP keys need to be loaded into the local keychain, which does -not happen for mirrors. +GPG keychain imports +~~~~~~~~~~~~~~~~~~~~ +IRRd uses GnuPG to validate PGP signatures used to authenticate authoritative +changes. This means that all `key-cert` objects need to be inserted into the +GnuPG keychain before users can submit PGP signed updates. -When using PostgreSQL replication, IRRd is not aware of how the objects in the -database are being changed. Therefore, you need to run the -``irrd_load_pgp_keys`` command before making a standby instance the active -instance to make sure PGP authentication keeps working. +By default, IRRd only inserts public PGP keys from `key-cert` objects for +authoritative sources - as there is no reason to do PGP signature validation +for non-authoritative sources. However, a standby source needs to have these +keys imported already to become active later. This can be enabled with the +``strict_import_keycert_objects`` setting on the mirror configuration. +When enabled, `key-cert` objects always use the strict importer which includes +importing into the key chain, which allows them to be used for authentication +in the future. +If your IRRd instance already has (or may have) `key-cert` objects that were +imported without ``strict_import_keycert_objects``, you can insert them into the +local keychain with the ``irrd_load_pgp_keys`` command. -Option 3: rebuilding from a periodic SQL dump ---------------------------------------------- -You can make a SQL dump of the PostgreSQL database and load it on another IRRd -instance. This is one of the simplest methods. However, it has one significant -danger: if changes happened in the old active instance, after the dump was made, -the dump is loaded into a new instance, which is then promoted to active, the -changes are not in the dump. This is expected. Worse is that new -changes made in the new active instance will reuse the same serials, and may -not be picked up by NRTM mirrors unless they refresh their copy. +The ``irrd_load_pgp_keys`` command may fail to import certain keys if they use +an unsupported format. It is safe to run multiple times, even if some or all +keys are already in the keychain, and safe to run while IRRd is running. -The same concerns for the GPG keychain with PostgreSQL replication apply -to this method as well. +Suppressed objects +~~~~~~~~~~~~~~~~~~ +:doc:`Suppressed objects ` are invisible +to normal queries and to the NRTM feed, but not deleted. They may +become visible again at any point in the future, e.g. by someone +creating a ROA or a change in another object. + +Suppressed objects are included in the PostgreSQL database, but not +in any RPSL exports. Therefore, the RPSL exports can not be used +as a full copy of the database. Otherwise all suppressed objects +would be lost upon promotion of a standby instance, which has +seemingly no effect if they remain suppressed, but also means they +can not become visible later. + +In a PostgreSQL replication setup, only the active instance will run +the object suppression tasks. Standby instances replicate the state +of the database including suppression status and e.g. the ROA +table. diff --git a/docs/admins/configuration.rst b/docs/admins/configuration.rst index 58558d528..082fb9f2f 100644 --- a/docs/admins/configuration.rst +++ b/docs/admins/configuration.rst @@ -196,13 +196,12 @@ General settings for improved performance |br| **Default**: not defined, but required. |br| **Change takes effect**: after full IRRd restart. -* ``database_readonly``: a boolean for whether this instance is - database read only, i.e. IRRd will never write any changes to the SQL database - in any circumstance. This can be used for - :doc:`availability with PostgreSQL replication `. - This setting means that this IRRd instance will never run the RPKI or scope - filter validators, and can not be used if any source has ``authoritative``, +* ``readonly_standby``: a boolean for whether this instance is + in read-only standby mode. See + :doc:`availability with PostgreSQL replication ` + for further details. Can not be used if any source has ``authoritative``, ``import_source`` or ``nrtm_host`` set. + **Do not enable this setting without reading the further documentation on standby setups.** |br| **Default**: ``false``. |br| **Change takes effect**: after full IRRd restart. * ``redis_url``: a URL to a Redis instance, e.g. @@ -664,6 +663,7 @@ Sources Sharing password hashes externally is a security risk, the unfiltered data is intended only to support :doc:`availability and data migration `. + **This setting is deprecated and will be removed in IRRD 4.5.** |br| **Default**: not defined, no exports made. |br| **Change takes effect**: after SIGHUP, at the next ``export_timer``. * ``sources.{name}.export_timer``: the time between two full exports of all @@ -686,6 +686,8 @@ Sources Unfiltered means full password hashes are included. Sharing password hashes externally is a security risk, the unfiltered data is intended only to support + :doc:`availability and data migration `. + **This setting is deprecated and will be removed in IRRD 4.5.** |br| **Default**: not defined, all access denied. Clients in ``nrtm_access_list``, if defined, have filtered access. |br| **Change takes effect**: after SIGHUP, upon next request. diff --git a/docs/admins/deployment.rst b/docs/admins/deployment.rst index da8aa4680..5d40e365e 100644 --- a/docs/admins/deployment.rst +++ b/docs/admins/deployment.rst @@ -99,8 +99,6 @@ size of the RPSL text imported. The PostgreSQL database is the only source of IRRd's data. This means you need to run regular backups of the database. - It is also possible to restore data from recent exports, - but changes made since the most recent export will be lost. .. _deployment-redis-configuration: diff --git a/irrd/conf/__init__.py b/irrd/conf/__init__.py index 325e01e9e..c6be01951 100644 --- a/irrd/conf/__init__.py +++ b/irrd/conf/__init__.py @@ -396,12 +396,12 @@ def _validate_subconfig(key, value): "nrtm_host or import_source are set." ) - if config.get("database_readonly") and ( + if config.get("readonly_standby") and ( details.get("authoritative") or details.get("nrtm_host") or details.get("import_source") ): errors.append( f"Source {name} can not have authoritative, import_source or nrtm_host set " - "when database_readonly is enabled." + "when readonly_standby is enabled." ) number_fields = [ diff --git a/irrd/conf/known_keys.py b/irrd/conf/known_keys.py index ef83f3bca..87476ec96 100644 --- a/irrd/conf/known_keys.py +++ b/irrd/conf/known_keys.py @@ -8,7 +8,7 @@ KNOWN_CONFIG_KEYS = DottedDict( { "database_url": {}, - "database_readonly": {}, + "readonly_standby": {}, "redis_url": {}, "piddir": {}, "user": {}, diff --git a/irrd/conf/test_conf.py b/irrd/conf/test_conf.py index ea809909a..54e39801b 100644 --- a/irrd/conf/test_conf.py +++ b/irrd/conf/test_conf.py @@ -242,7 +242,7 @@ def test_load_valid_reload_invalid_config(self, save_yaml_config, tmpdir, caplog def test_load_invalid_config(self, save_yaml_config, tmpdir): config = { "irrd": { - "database_readonly": True, + "readonly_standby": True, "piddir": str(tmpdir + "/does-not-exist"), "user": "a", "secret_key": "sssssssssssss", @@ -379,12 +379,12 @@ def test_load_invalid_config(self, save_yaml_config, tmpdir): in str(ce.value) ) assert ( - "Source TESTDB can not have authoritative, import_source or nrtm_host set when database_readonly" + "Source TESTDB can not have authoritative, import_source or nrtm_host set when readonly_standby" " is enabled." in str(ce.value) ) assert ( - "Source TESTDB3 can not have authoritative, import_source or nrtm_host set when database_readonly" + "Source TESTDB3 can not have authoritative, import_source or nrtm_host set when readonly_standby" " is enabled." in str(ce.value) ) diff --git a/irrd/daemon/main.py b/irrd/daemon/main.py index 28ca02a76..81cd4844a 100755 --- a/irrd/daemon/main.py +++ b/irrd/daemon/main.py @@ -142,10 +142,8 @@ def run_irrd(mirror_frequency: int, config_file_path: str, uid: Optional[int], g mirror_scheduler = MirrorScheduler() - preload_manager = None - if not get_setting(f"database_readonly"): - preload_manager = PreloadStoreManager(name="irrd-preload-store-manager") - preload_manager.start() + preload_manager = PreloadStoreManager(name="irrd-preload-store-manager") + preload_manager.start() uvicorn_process = ExceptionLoggingProcess( target=run_http_server, name="irrd-http-server-listener", args=(config_file_path,) diff --git a/irrd/mirroring/jobs.py b/irrd/mirroring/jobs.py new file mode 100644 index 000000000..582597f46 --- /dev/null +++ b/irrd/mirroring/jobs.py @@ -0,0 +1,49 @@ +import logging +from datetime import datetime +from typing import Optional + +from irrd.storage.database_handler import DatabaseHandler +from irrd.storage.preload import Preloader + +logger = logging.getLogger(__name__) + + +class TransactionTimePreloadSignaller: + """ + Signal a preload based on the last transaction time. + """ + + last_time: Optional[datetime] = None + + def run(self): + self.database_handler = DatabaseHandler() + self.preloader = Preloader(enable_queries=False) + + try: + current_time = self.database_handler.timestamp_last_committed_transaction() + if not self.last_time or self.last_time != current_time: + logger.debug( + ( + f"Signalling preload reload: last transaction completed {current_time}, previous" + f" known last transaction was {self.last_time}" + ), + ) + self.preloader.signal_reload() + self.last_time = current_time + except Exception as exc: + logger.error( + ( + "An exception occurred while attempting to check transaction timing, signalling preload" + f" reload anyways: {exc}" + ), + exc_info=exc, + ) + try: + self.preloader.signal_reload() + except Exception as exc: + logger.error( + f"Failed to send preload reload signal: {exc}", + exc_info=exc, + ) + finally: + self.database_handler.close() diff --git a/irrd/mirroring/mirror_runners_import.py b/irrd/mirroring/mirror_runners_import.py index 7672c86f9..2f666845f 100644 --- a/irrd/mirroring/mirror_runners_import.py +++ b/irrd/mirroring/mirror_runners_import.py @@ -279,10 +279,6 @@ class ROAImportRunner(FileImportRunnerBase): in the configuration. """ - # API consistency with other importers, source is actually ignored - def __init__(self, source=None): - pass - def run(self): self.database_handler = DatabaseHandler() @@ -355,10 +351,6 @@ class ScopeFilterUpdateRunner: is in the configuration. """ - # API consistency with other importers, source is actually ignored - def __init__(self, source=None): - pass - def run(self): self.database_handler = DatabaseHandler() diff --git a/irrd/mirroring/scheduler.py b/irrd/mirroring/scheduler.py index 4afc3e95e..732f520fb 100644 --- a/irrd/mirroring/scheduler.py +++ b/irrd/mirroring/scheduler.py @@ -4,12 +4,13 @@ import signal import time from collections import defaultdict -from typing import Dict +from typing import Dict, Optional from setproctitle import setproctitle -from irrd.conf import RPKI_IRR_PSEUDO_SOURCE, get_setting +from irrd.conf import get_setting from irrd.conf.defaults import DEFAULT_SOURCE_EXPORT_TIMER, DEFAULT_SOURCE_IMPORT_TIMER +from irrd.mirroring.jobs import TransactionTimePreloadSignaller from .mirror_runners_export import SourceExportRunner from .mirror_runners_import import ( @@ -57,7 +58,7 @@ def run(self): class MirrorScheduler: """ - Scheduler for mirroring processes. + Scheduler for periodic processes, mainly mirroring. For each time run() is called, will start a process for each mirror database unless a process is still running for that database (which is likely to be @@ -74,14 +75,18 @@ def __init__(self, *args, **kwargs): self.previous_scopefilter_prefixes = None self.previous_scopefilter_asns = None self.previous_scopefilter_excluded = None + # This signaller is special in that it does not run in a separate + # process and keeps state in the instance. + self.transaction_time_preload_signaller = TransactionTimePreloadSignaller() def run(self) -> None: - if get_setting("database_readonly"): + if get_setting("readonly_standby"): + self.transaction_time_preload_signaller.run() return if get_setting("rpki.roa_source"): import_timer = int(get_setting("rpki.roa_import_timer")) - self.run_if_relevant(RPKI_IRR_PSEUDO_SOURCE, ROAImportRunner, import_timer) + self.run_if_relevant(None, ROAImportRunner, import_timer) if get_setting("sources") and any( [ @@ -90,10 +95,10 @@ def run(self) -> None: ] ): import_timer = int(get_setting("route_object_preference.update_timer")) - self.run_if_relevant("routepref", RoutePreferenceUpdateRunner, import_timer) + self.run_if_relevant(None, RoutePreferenceUpdateRunner, import_timer) if self._check_scopefilter_change(): - self.run_if_relevant("scopefilter", ScopeFilterUpdateRunner, 0) + self.run_if_relevant(None, ScopeFilterUpdateRunner, 0) sources_started = 0 for source in get_setting("sources", {}).keys(): @@ -150,15 +155,23 @@ def _check_scopefilter_change(self) -> bool: return True return False - def run_if_relevant(self, source: str, runner_class, timer: int) -> bool: - process_name = f"{runner_class.__name__}-{source}" + def run_if_relevant(self, source: Optional[str], runner_class, timer: int) -> bool: + process_name = runner_class.__name__ + if source: + process_name += f"-{source}" current_time = time.time() has_expired = (self.last_started_time[process_name] + timer) < current_time if not has_expired or process_name in self.processes: return False - logger.debug(f"Started new process {process_name} for mirror import/export for {source}") - initiator = runner_class(source=source) + kwargs = {} + msg = f"Started new scheduled process {process_name}" + if source: + msg += f"for mirror import/export for {source}" + kwargs["source"] = source + logger.debug(msg) + + initiator = runner_class(**kwargs) process = ScheduledTaskProcess(runner=initiator, name=process_name) self.processes[process_name] = process process.start() diff --git a/irrd/mirroring/tests/test_jobs.py b/irrd/mirroring/tests/test_jobs.py new file mode 100644 index 000000000..06ad6f8c5 --- /dev/null +++ b/irrd/mirroring/tests/test_jobs.py @@ -0,0 +1,57 @@ +import datetime +from unittest.mock import create_autospec + +from irrd.storage.database_handler import DatabaseHandler +from irrd.storage.preload import Preloader + +from ...utils.test_utils import flatten_mock_calls +from ..jobs import TransactionTimePreloadSignaller + + +class TestTransactionTimePreloadSignaller: + def test_run(self, monkeypatch): + mock_dh = create_autospec(DatabaseHandler) + mock_preloader = create_autospec(Preloader) + + monkeypatch.setattr("irrd.mirroring.jobs.DatabaseHandler", lambda: mock_dh) + monkeypatch.setattr("irrd.mirroring.jobs.Preloader", lambda enable_queries: mock_preloader) + + mock_dh.timestamp_last_committed_transaction = lambda: datetime.datetime(2023, 1, 1) + + signaller = TransactionTimePreloadSignaller() + signaller.run() + signaller.run() + # Should only have one call + assert flatten_mock_calls(mock_preloader) == [["signal_reload", (), {}]] + + mock_preloader.reset_mock() + mock_dh.timestamp_last_committed_transaction = lambda: datetime.datetime(2023, 1, 2) + signaller.run() + assert flatten_mock_calls(mock_preloader) == [["signal_reload", (), {}]] + + def test_fail_database_query(self, monkeypatch, caplog): + mock_dh = create_autospec(DatabaseHandler) + mock_preloader = create_autospec(Preloader) + + monkeypatch.setattr("irrd.mirroring.jobs.DatabaseHandler", lambda: mock_dh) + monkeypatch.setattr("irrd.mirroring.jobs.Preloader", lambda enable_queries: mock_preloader) + + mock_dh.timestamp_last_committed_transaction.side_effect = Exception() + + signaller = TransactionTimePreloadSignaller() + signaller.run() + assert flatten_mock_calls(mock_preloader) == [["signal_reload", (), {}]] + assert "exception occurred" in caplog.text + + def test_fail_preload(self, monkeypatch, caplog): + mock_dh = create_autospec(DatabaseHandler) + mock_preloader = create_autospec(Preloader) + + monkeypatch.setattr("irrd.mirroring.jobs.DatabaseHandler", lambda: mock_dh) + monkeypatch.setattr("irrd.mirroring.jobs.Preloader", lambda enable_queries: mock_preloader) + + mock_preloader.signal_reload.side_effect = Exception() + + signaller = TransactionTimePreloadSignaller() + signaller.run() + assert "Failed to send" in caplog.text diff --git a/irrd/mirroring/tests/test_scheduler.py b/irrd/mirroring/tests/test_scheduler.py index 0281d131d..156d60c92 100644 --- a/irrd/mirroring/tests/test_scheduler.py +++ b/irrd/mirroring/tests/test_scheduler.py @@ -1,47 +1,57 @@ import threading import time +from unittest.mock import create_autospec +from irrd.mirroring.jobs import TransactionTimePreloadSignaller + +from ...utils.test_utils import flatten_mock_calls from ..scheduler import MAX_SIMULTANEOUS_RUNS, MirrorScheduler, ScheduledTaskProcess thread_run_count = 0 class TestMirrorScheduler: - def test_scheduler_database_readonly(self, monkeypatch, config_override): + def test_scheduler_standby_preload_signaller(self, monkeypatch, config_override): + mock_preload_signaller = create_autospec(TransactionTimePreloadSignaller) + monkeypatch.setattr( + "irrd.mirroring.scheduler.TransactionTimePreloadSignaller", mock_preload_signaller + ) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) + monkeypatch.setattr("irrd.mirroring.scheduler.RPSLMirrorImportUpdateRunner", MockRunner) global thread_run_count thread_run_count = 0 config_override( { - "database_readonly": True, - "sources": { - "TEST": { - "import_source": "url", - "import_timer": 0, - } - }, + "standby": True, + "readonly_standby": True, } ) - monkeypatch.setattr("irrd.mirroring.scheduler.RPSLMirrorImportUpdateRunner", MockRunner) scheduler = MirrorScheduler() + assert flatten_mock_calls(mock_preload_signaller) == [["", (), {}]] + mock_preload_signaller.reset_mock() + scheduler.run() scheduler.run() + assert flatten_mock_calls(mock_preload_signaller) == [["run", (), {}], ["run", (), {}]] + assert thread_run_count == 0 def test_scheduler_runs_rpsl_import(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "import_source": "url", "import_timer": 0, } - } + }, } ) @@ -66,12 +76,14 @@ def test_scheduler_runs_rpsl_import(self, monkeypatch, config_override): assert len(scheduler.processes.items()) == 0 def test_scheduler_limits_simultaneous_runs(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "import_source": "url", @@ -89,7 +101,7 @@ def test_scheduler_limits_simultaneous_runs(self, monkeypatch, config_override): "import_source": "url", "import_timer": 0, }, - } + }, } ) @@ -103,6 +115,7 @@ def test_scheduler_limits_simultaneous_runs(self, monkeypatch, config_override): assert thread_run_count == MAX_SIMULTANEOUS_RUNS def test_scheduler_runs_roa_import(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -121,6 +134,7 @@ def test_scheduler_runs_roa_import(self, monkeypatch, config_override): assert thread_run_count == 1 def test_scheduler_runs_scopefilter(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -185,6 +199,7 @@ def test_scheduler_runs_scopefilter(self, monkeypatch, config_override): assert thread_run_count == 3 def test_scheduler_runs_route_preference(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -210,18 +225,20 @@ def test_scheduler_runs_route_preference(self, monkeypatch, config_override): assert thread_run_count == 1 def test_scheduler_import_ignores_timer_not_expired(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "import_source": "url", "import_timer": 100, } - } + }, } ) @@ -239,18 +256,20 @@ def test_scheduler_import_ignores_timer_not_expired(self, monkeypatch, config_ov assert thread_run_count == 1 def test_scheduler_runs_export(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "export_destination": "url", "export_timer": 0, } - } + }, } ) @@ -272,12 +291,13 @@ def test_scheduler_export_ignores_timer_not_expired(self, monkeypatch, config_ov config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "export_destination": "url", "export_timer": 100, } - } + }, } ) @@ -307,8 +327,8 @@ def test_task(self): class MockRunner: run_sleep = True - def __init__(self, source): - assert source in ["TEST", "TEST2", "TEST3", "TEST4", "RPKI", "scopefilter", "routepref"] + def __init__(self, source=None): + assert source in ["TEST", "TEST2", "TEST3", "TEST4", None] def run(self): global thread_run_count diff --git a/irrd/scripts/expire_journal.py b/irrd/scripts/expire_journal.py index 04cd15f49..c6c905ae0 100755 --- a/irrd/scripts/expire_journal.py +++ b/irrd/scripts/expire_journal.py @@ -83,8 +83,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) try: diff --git a/irrd/scripts/irrd_control.py b/irrd/scripts/irrd_control.py index e4b9dbf9e..55207441b 100755 --- a/irrd/scripts/irrd_control.py +++ b/irrd/scripts/irrd_control.py @@ -20,10 +20,10 @@ logger = logging.getLogger(__name__) -def check_database_readonly(f): +def check_readonly_standby(f): def new_func(*args, **kwargs): - if get_setting("database_readonly"): - raise click.ClickException("Unable to run this command, because database_readonly is set.") + if get_setting("readonly_standby"): + raise click.ClickException("Unable to run this command, because readonly_standby is set.") return f(*args, **kwargs) return update_wrapper(new_func, f) @@ -43,7 +43,7 @@ def cli(config): @cli.command() @click.argument("email") -@check_database_readonly +@check_readonly_standby @session_provider_manager_sync def user_mfa_clear(email, session_provider: ORMSessionProvider): """ @@ -92,7 +92,7 @@ def user_mfa_clear(email, session_provider: ORMSessionProvider): @cli.command() @click.argument("email") @click.option("--enable/--disable", default=True) -@check_database_readonly +@check_readonly_standby @session_provider_manager_sync def user_change_override(email: str, enable: bool, session_provider: ORMSessionProvider): """ diff --git a/irrd/scripts/load_database.py b/irrd/scripts/load_database.py index f4e07dcb8..1bf146442 100755 --- a/irrd/scripts/load_database.py +++ b/irrd/scripts/load_database.py @@ -72,8 +72,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) sys.exit(load(args.source, args.input_file, args.serial)) diff --git a/irrd/scripts/mirror_force_reload.py b/irrd/scripts/mirror_force_reload.py index de8ad899e..bcee9878a 100755 --- a/irrd/scripts/mirror_force_reload.py +++ b/irrd/scripts/mirror_force_reload.py @@ -36,8 +36,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) set_force_reload(args.source) diff --git a/irrd/scripts/rpsl_read.py b/irrd/scripts/rpsl_read.py index 7a31a74d1..c11e9d01b 100755 --- a/irrd/scripts/rpsl_read.py +++ b/irrd/scripts/rpsl_read.py @@ -107,8 +107,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) RPSLParse().main(args.input_file, args.strict_validation, args.database, not args.hide_info) diff --git a/irrd/scripts/set_last_modified_auth.py b/irrd/scripts/set_last_modified_auth.py index 8052d2de9..faff61411 100755 --- a/irrd/scripts/set_last_modified_auth.py +++ b/irrd/scripts/set_last_modified_auth.py @@ -57,8 +57,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) sys.exit(set_last_modified()) diff --git a/irrd/scripts/tests/test_irrd_control.py b/irrd/scripts/tests/test_irrd_control.py index c9774c414..a2cc9ec83 100644 --- a/irrd/scripts/tests/test_irrd_control.py +++ b/irrd/scripts/tests/test_irrd_control.py @@ -75,13 +75,13 @@ def test_user_does_not_exist(self, irrd_db_session_with_user, smtpd_override): assert "No user found" in result.output assert not smtpd_override.messages - def test_database_readonly(self, irrd_db_session_with_user, config_override, smtpd_override): - config_override({"database_readonly": True}) + def test_readonly_standby(self, irrd_db_session_with_user, config_override, smtpd_override): + config_override({"readonly_standby": True}) runner = CliRunner() result = runner.invoke(user_mfa_clear, ["user.email"]) assert result.exit_code == 1 - assert "database_readonly" in result.output + assert "readonly_standby" in result.output assert not smtpd_override.messages @@ -162,11 +162,11 @@ def test_no_mfa(self, irrd_db_session_with_user): assert result.exit_code == 1 assert "has no two-factor" in result.output - def test_database_readonly(self, irrd_db_session_with_user, config_override): - config_override({"database_readonly": True}) + def test_readonly_standby(self, irrd_db_session_with_user, config_override): + config_override({"readonly_standby": True}) session_provider, user = irrd_db_session_with_user runner = CliRunner() result = runner.invoke(user_change_override, [user.email, "--enable"], input="y") assert result.exit_code == 1 - assert "database_readonly" in result.output + assert "readonly_standby" in result.output diff --git a/irrd/scripts/update_database.py b/irrd/scripts/update_database.py index d546a81b3..dd301c360 100644 --- a/irrd/scripts/update_database.py +++ b/irrd/scripts/update_database.py @@ -64,8 +64,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) sys.exit(update(args.source, args.input_file)) diff --git a/irrd/storage/database_handler.py b/irrd/storage/database_handler.py index 26d37ec2e..9d2ec0a5d 100644 --- a/irrd/storage/database_handler.py +++ b/irrd/storage/database_handler.py @@ -92,10 +92,10 @@ def __init__(self, readonly=False): If readonly is True, this instance will expect read queries only. No transaction will be started, all queries will use autocommit. - Readonly is always true if database_readonly is set in the config. + Readonly is always true if readonly_standby is set in the config. """ self.status_tracker = None - if get_setting("database_readonly"): + if get_setting("readonly_standby"): self.readonly = True else: self.readonly = readonly @@ -586,7 +586,7 @@ def delete_rpsl_object( table.c.prefix, table.c.object_text, ) - results = self._connection.execute(stmt) + results = self.execute_statement(stmt) if not self._check_single_row_match(results, user_identifier=f"{rpsl_pk}/{source}"): return None @@ -817,6 +817,10 @@ def set_force_reload(self, source): "current settings, actual reload process wll take place in next scheduled importer run" ) + def timestamp_last_committed_transaction(self) -> datetime: + result = self.execute_statement("SELECT timestamp FROM pg_last_committed_xact()") + return result.fetchone()["timestamp"] + def record_serial_newest_mirror(self, source: str, serial: int) -> None: """ Record that a mirror was updated to a certain serial. diff --git a/irrd/storage/preload.py b/irrd/storage/preload.py index 4fab19e85..ae32c86cf 100644 --- a/irrd/storage/preload.py +++ b/irrd/storage/preload.py @@ -99,7 +99,7 @@ def __init__(self, enable_queries=True): callback=self._load_routes_into_memory, pubsub=self._pubsub, sleep_time=5, daemon=True ) self._pubsub_thread.start() - if get_setting("database_readonly"): # pragma: no cover + if get_setting("readonly_standby"): # pragma: no cover # If this instance is readonly, another IRRd process will be updating # the store, and likely has already done so, meaning we can try to load # from Redis right away instead of waiting for a signal. diff --git a/irrd/storage/tests/test_database.py b/irrd/storage/tests/test_database.py index e2d24b674..2b98d1be0 100644 --- a/irrd/storage/tests/test_database.py +++ b/irrd/storage/tests/test_database.py @@ -96,7 +96,7 @@ def test_readonly(self, monkeypatch, irrd_db_mock_preload, config_override): config_override( { - "database_readonly": True, + "readonly_standby": True, } ) @@ -215,6 +215,7 @@ def test_object_writing_and_status_checking(self, monkeypatch, irrd_db_mock_prel self.dh.upsert_rpsl_object(rpsl_object_route_v6, JournalEntryOrigin.auth_change, source_serial=43) self.dh.commit() + initial_tx_timestamp = self.dh.timestamp_last_committed_transaction() self.dh.refresh_connection() # There should be two entries with MNT-CORRECT in the db now. @@ -442,6 +443,8 @@ def test_object_writing_and_status_checking(self, monkeypatch, irrd_db_mock_prel assert not len(list(self.dh.execute_query(DatabaseStatusQuery().sources(["TEST"])))) assert len(list(self.dh.execute_query(RPSLDatabaseQuery().sources(["TEST2"])))) == 1 + assert self.dh.timestamp_last_committed_transaction() > initial_tx_timestamp + self.dh.close() assert flatten_mock_calls(self.dh.changed_objects_tracker.preloader.signal_reload) == [