From ce99b799e7b28ceaab1301fecdbfdf25544bd554 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Tue, 11 Jul 2023 15:12:48 +0200 Subject: [PATCH 01/10] Clean up some details in mirroring scheduler. --- irrd/mirroring/mirror_runners_import.py | 8 ------- irrd/mirroring/scheduler.py | 28 ++++++++++++++++--------- irrd/mirroring/tests/test_scheduler.py | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/irrd/mirroring/mirror_runners_import.py b/irrd/mirroring/mirror_runners_import.py index 7672c86f9..2f666845f 100644 --- a/irrd/mirroring/mirror_runners_import.py +++ b/irrd/mirroring/mirror_runners_import.py @@ -279,10 +279,6 @@ class ROAImportRunner(FileImportRunnerBase): in the configuration. """ - # API consistency with other importers, source is actually ignored - def __init__(self, source=None): - pass - def run(self): self.database_handler = DatabaseHandler() @@ -355,10 +351,6 @@ class ScopeFilterUpdateRunner: is in the configuration. """ - # API consistency with other importers, source is actually ignored - def __init__(self, source=None): - pass - def run(self): self.database_handler = DatabaseHandler() diff --git a/irrd/mirroring/scheduler.py b/irrd/mirroring/scheduler.py index 4afc3e95e..cbc1a5ae4 100644 --- a/irrd/mirroring/scheduler.py +++ b/irrd/mirroring/scheduler.py @@ -4,11 +4,11 @@ import signal import time from collections import defaultdict -from typing import Dict +from typing import Dict, Optional from setproctitle import setproctitle -from irrd.conf import RPKI_IRR_PSEUDO_SOURCE, get_setting +from irrd.conf import get_setting from irrd.conf.defaults import DEFAULT_SOURCE_EXPORT_TIMER, DEFAULT_SOURCE_IMPORT_TIMER from .mirror_runners_export import SourceExportRunner @@ -57,7 +57,7 @@ def run(self): class MirrorScheduler: """ - Scheduler for mirroring processes. + Scheduler for periodic processes, mainly mirroring. For each time run() is called, will start a process for each mirror database unless a process is still running for that database (which is likely to be @@ -81,7 +81,7 @@ def run(self) -> None: if get_setting("rpki.roa_source"): import_timer = int(get_setting("rpki.roa_import_timer")) - self.run_if_relevant(RPKI_IRR_PSEUDO_SOURCE, ROAImportRunner, import_timer) + self.run_if_relevant(None, ROAImportRunner, import_timer) if get_setting("sources") and any( [ @@ -90,10 +90,10 @@ def run(self) -> None: ] ): import_timer = int(get_setting("route_object_preference.update_timer")) - self.run_if_relevant("routepref", RoutePreferenceUpdateRunner, import_timer) + self.run_if_relevant(None, RoutePreferenceUpdateRunner, import_timer) if self._check_scopefilter_change(): - self.run_if_relevant("scopefilter", ScopeFilterUpdateRunner, 0) + self.run_if_relevant(None, ScopeFilterUpdateRunner, 0) sources_started = 0 for source in get_setting("sources", {}).keys(): @@ -150,15 +150,23 @@ def _check_scopefilter_change(self) -> bool: return True return False - def run_if_relevant(self, source: str, runner_class, timer: int) -> bool: - process_name = f"{runner_class.__name__}-{source}" + def run_if_relevant(self, source: Optional[str], runner_class, timer: int) -> bool: + process_name = runner_class.__name__ + if source: + process_name += f"-{source}" current_time = time.time() has_expired = (self.last_started_time[process_name] + timer) < current_time if not has_expired or process_name in self.processes: return False - logger.debug(f"Started new process {process_name} for mirror import/export for {source}") - initiator = runner_class(source=source) + kwargs = {} + msg = f"Started new scheduled process {process_name}" + if source: + msg += f"for mirror import/export for {source}" + kwargs["source"] = source + logger.debug(msg) + + initiator = runner_class(**kwargs) process = ScheduledTaskProcess(runner=initiator, name=process_name) self.processes[process_name] = process process.start() diff --git a/irrd/mirroring/tests/test_scheduler.py b/irrd/mirroring/tests/test_scheduler.py index 0281d131d..46d751a6c 100644 --- a/irrd/mirroring/tests/test_scheduler.py +++ b/irrd/mirroring/tests/test_scheduler.py @@ -307,8 +307,8 @@ def test_task(self): class MockRunner: run_sleep = True - def __init__(self, source): - assert source in ["TEST", "TEST2", "TEST3", "TEST4", "RPKI", "scopefilter", "routepref"] + def __init__(self, source=None): + assert source in ["TEST", "TEST2", "TEST3", "TEST4", None] def run(self): global thread_run_count From 4cb5edc3a13e7ebaa3e08b387481ff2dcbb3dc9b Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Tue, 11 Jul 2023 16:27:32 +0200 Subject: [PATCH 02/10] try testing tx q --- .circleci/config.yml | 4 ++++ irrd/storage/database_handler.py | 6 +++++- irrd/storage/tests/test_database.py | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2b3947cd9..19a6aeb63 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -197,6 +197,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 @@ -233,6 +234,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 @@ -271,6 +273,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 @@ -310,6 +313,7 @@ jobs: POSTGRES_USER: root POSTGRES_DB: circle_test POSTGRES_HOST_AUTH_METHOD: trust + command: postgres -c track_commit_timestamp=true - image: cimg/redis:<< parameters.redis_version >> # - image: cimg/rust:1.65 diff --git a/irrd/storage/database_handler.py b/irrd/storage/database_handler.py index 26d37ec2e..454bdea49 100644 --- a/irrd/storage/database_handler.py +++ b/irrd/storage/database_handler.py @@ -586,7 +586,7 @@ def delete_rpsl_object( table.c.prefix, table.c.object_text, ) - results = self._connection.execute(stmt) + results = self.execute_statement(stmt) if not self._check_single_row_match(results, user_identifier=f"{rpsl_pk}/{source}"): return None @@ -817,6 +817,10 @@ def set_force_reload(self, source): "current settings, actual reload process wll take place in next scheduled importer run" ) + def timestamp_last_committed_transaction(self) -> datetime: + result = self.execute_statement("SELECT timestamp FROM pg_last_committed_xact()") + return result.fetchone()["timestamp"] + def record_serial_newest_mirror(self, source: str, serial: int) -> None: """ Record that a mirror was updated to a certain serial. diff --git a/irrd/storage/tests/test_database.py b/irrd/storage/tests/test_database.py index e2d24b674..b7ac5eb70 100644 --- a/irrd/storage/tests/test_database.py +++ b/irrd/storage/tests/test_database.py @@ -215,6 +215,7 @@ def test_object_writing_and_status_checking(self, monkeypatch, irrd_db_mock_prel self.dh.upsert_rpsl_object(rpsl_object_route_v6, JournalEntryOrigin.auth_change, source_serial=43) self.dh.commit() + initial_tx_timestamp = self.dh.timestamp_last_committed_transaction() self.dh.refresh_connection() # There should be two entries with MNT-CORRECT in the db now. @@ -442,6 +443,8 @@ def test_object_writing_and_status_checking(self, monkeypatch, irrd_db_mock_prel assert not len(list(self.dh.execute_query(DatabaseStatusQuery().sources(["TEST"])))) assert len(list(self.dh.execute_query(RPSLDatabaseQuery().sources(["TEST2"])))) == 1 + assert self.dh.timestamp_last_committed_transaction() > initial_tx_timestamp + self.dh.close() assert flatten_mock_calls(self.dh.changed_objects_tracker.preloader.signal_reload) == [ From 02b65e2b3a1a713600ff031358187f7a654f43d7 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Tue, 11 Jul 2023 20:26:47 +0200 Subject: [PATCH 03/10] add signaller --- irrd/conf/known_keys.py | 1 + irrd/mirroring/jobs.py | 49 ++++++++++++++++++++++ irrd/mirroring/scheduler.py | 5 +++ irrd/mirroring/tests/test_jobs.py | 57 ++++++++++++++++++++++++++ irrd/mirroring/tests/test_scheduler.py | 47 ++++++++++++++++++--- 5 files changed, 154 insertions(+), 5 deletions(-) create mode 100644 irrd/mirroring/jobs.py create mode 100644 irrd/mirroring/tests/test_jobs.py diff --git a/irrd/conf/known_keys.py b/irrd/conf/known_keys.py index ef83f3bca..f3e515719 100644 --- a/irrd/conf/known_keys.py +++ b/irrd/conf/known_keys.py @@ -9,6 +9,7 @@ { "database_url": {}, "database_readonly": {}, + "standby": {}, "redis_url": {}, "piddir": {}, "user": {}, diff --git a/irrd/mirroring/jobs.py b/irrd/mirroring/jobs.py new file mode 100644 index 000000000..034ee022c --- /dev/null +++ b/irrd/mirroring/jobs.py @@ -0,0 +1,49 @@ +import logging +from datetime import datetime +from typing import Optional + +from irrd.storage.database_handler import DatabaseHandler +from irrd.storage.preload import Preloader + +logger = logging.getLogger(__name__) + + +class TransactionTimePreloadSignaller: + """ + Signal a preload based on the last transaction time. + """ + + last_time = Optional[datetime] + + def run(self): + self.database_handler = DatabaseHandler() + self.preloader = Preloader(enable_queries=False) + + try: + current_time = self.database_handler.timestamp_last_committed_transaction() + if not self.last_time or self.last_time != current_time: + self.preloader.signal_reload() + logger.debug( + ( + f"Signalling preload reload: last transaction completed {current_time}, previous" + f" known last transaction was {self.last_time}" + ), + ) + self.last_time = current_time + except Exception as exc: + logger.error( + ( + "An exception occurred while attempting to check transaction timing, signalling preload" + f" reload anyways: {exc}" + ), + exc_info=exc, + ) + try: + self.preloader.signal_reload() + except Exception as exc: + logger.error( + f"Failed to send preload reload signal: {exc}", + exc_info=exc, + ) + finally: + self.database_handler.close() diff --git a/irrd/mirroring/scheduler.py b/irrd/mirroring/scheduler.py index cbc1a5ae4..f6256118f 100644 --- a/irrd/mirroring/scheduler.py +++ b/irrd/mirroring/scheduler.py @@ -10,6 +10,7 @@ from irrd.conf import get_setting from irrd.conf.defaults import DEFAULT_SOURCE_EXPORT_TIMER, DEFAULT_SOURCE_IMPORT_TIMER +from irrd.mirroring.jobs import TransactionTimePreloadSignaller from .mirror_runners_export import SourceExportRunner from .mirror_runners_import import ( @@ -74,8 +75,12 @@ def __init__(self, *args, **kwargs): self.previous_scopefilter_prefixes = None self.previous_scopefilter_asns = None self.previous_scopefilter_excluded = None + self.transaction_time_preload_signaller = TransactionTimePreloadSignaller() def run(self) -> None: + if get_setting("standby"): + self.transaction_time_preload_signaller.run() + if get_setting("database_readonly"): return diff --git a/irrd/mirroring/tests/test_jobs.py b/irrd/mirroring/tests/test_jobs.py new file mode 100644 index 000000000..06ad6f8c5 --- /dev/null +++ b/irrd/mirroring/tests/test_jobs.py @@ -0,0 +1,57 @@ +import datetime +from unittest.mock import create_autospec + +from irrd.storage.database_handler import DatabaseHandler +from irrd.storage.preload import Preloader + +from ...utils.test_utils import flatten_mock_calls +from ..jobs import TransactionTimePreloadSignaller + + +class TestTransactionTimePreloadSignaller: + def test_run(self, monkeypatch): + mock_dh = create_autospec(DatabaseHandler) + mock_preloader = create_autospec(Preloader) + + monkeypatch.setattr("irrd.mirroring.jobs.DatabaseHandler", lambda: mock_dh) + monkeypatch.setattr("irrd.mirroring.jobs.Preloader", lambda enable_queries: mock_preloader) + + mock_dh.timestamp_last_committed_transaction = lambda: datetime.datetime(2023, 1, 1) + + signaller = TransactionTimePreloadSignaller() + signaller.run() + signaller.run() + # Should only have one call + assert flatten_mock_calls(mock_preloader) == [["signal_reload", (), {}]] + + mock_preloader.reset_mock() + mock_dh.timestamp_last_committed_transaction = lambda: datetime.datetime(2023, 1, 2) + signaller.run() + assert flatten_mock_calls(mock_preloader) == [["signal_reload", (), {}]] + + def test_fail_database_query(self, monkeypatch, caplog): + mock_dh = create_autospec(DatabaseHandler) + mock_preloader = create_autospec(Preloader) + + monkeypatch.setattr("irrd.mirroring.jobs.DatabaseHandler", lambda: mock_dh) + monkeypatch.setattr("irrd.mirroring.jobs.Preloader", lambda enable_queries: mock_preloader) + + mock_dh.timestamp_last_committed_transaction.side_effect = Exception() + + signaller = TransactionTimePreloadSignaller() + signaller.run() + assert flatten_mock_calls(mock_preloader) == [["signal_reload", (), {}]] + assert "exception occurred" in caplog.text + + def test_fail_preload(self, monkeypatch, caplog): + mock_dh = create_autospec(DatabaseHandler) + mock_preloader = create_autospec(Preloader) + + monkeypatch.setattr("irrd.mirroring.jobs.DatabaseHandler", lambda: mock_dh) + monkeypatch.setattr("irrd.mirroring.jobs.Preloader", lambda enable_queries: mock_preloader) + + mock_preloader.signal_reload.side_effect = Exception() + + signaller = TransactionTimePreloadSignaller() + signaller.run() + assert "Failed to send" in caplog.text diff --git a/irrd/mirroring/tests/test_scheduler.py b/irrd/mirroring/tests/test_scheduler.py index 46d751a6c..acd3de6fc 100644 --- a/irrd/mirroring/tests/test_scheduler.py +++ b/irrd/mirroring/tests/test_scheduler.py @@ -1,13 +1,38 @@ import threading import time +from unittest.mock import create_autospec +from irrd.mirroring.jobs import TransactionTimePreloadSignaller + +from ...utils.test_utils import flatten_mock_calls from ..scheduler import MAX_SIMULTANEOUS_RUNS, MirrorScheduler, ScheduledTaskProcess thread_run_count = 0 class TestMirrorScheduler: + def test_scheduler_standby_preload_signaller(self, monkeypatch, config_override): + mock_preload_signaller = create_autospec(TransactionTimePreloadSignaller) + monkeypatch.setattr( + "irrd.mirroring.scheduler.TransactionTimePreloadSignaller", mock_preload_signaller + ) + + config_override( + { + "standby": True, + "database_readonly": True, + } + ) + + scheduler = MirrorScheduler() + assert flatten_mock_calls(mock_preload_signaller) == [["", (), {}]] + mock_preload_signaller.reset_mock() + scheduler.run() + scheduler.run() + assert flatten_mock_calls(mock_preload_signaller) == [["run", (), {}], ["run", (), {}]] + def test_scheduler_database_readonly(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -30,18 +55,20 @@ def test_scheduler_database_readonly(self, monkeypatch, config_override): assert thread_run_count == 0 def test_scheduler_runs_rpsl_import(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "import_source": "url", "import_timer": 0, } - } + }, } ) @@ -66,12 +93,14 @@ def test_scheduler_runs_rpsl_import(self, monkeypatch, config_override): assert len(scheduler.processes.items()) == 0 def test_scheduler_limits_simultaneous_runs(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "import_source": "url", @@ -89,7 +118,7 @@ def test_scheduler_limits_simultaneous_runs(self, monkeypatch, config_override): "import_source": "url", "import_timer": 0, }, - } + }, } ) @@ -103,6 +132,7 @@ def test_scheduler_limits_simultaneous_runs(self, monkeypatch, config_override): assert thread_run_count == MAX_SIMULTANEOUS_RUNS def test_scheduler_runs_roa_import(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -121,6 +151,7 @@ def test_scheduler_runs_roa_import(self, monkeypatch, config_override): assert thread_run_count == 1 def test_scheduler_runs_scopefilter(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -185,6 +216,7 @@ def test_scheduler_runs_scopefilter(self, monkeypatch, config_override): assert thread_run_count == 3 def test_scheduler_runs_route_preference(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 @@ -210,18 +242,20 @@ def test_scheduler_runs_route_preference(self, monkeypatch, config_override): assert thread_run_count == 1 def test_scheduler_import_ignores_timer_not_expired(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "import_source": "url", "import_timer": 100, } - } + }, } ) @@ -239,18 +273,20 @@ def test_scheduler_import_ignores_timer_not_expired(self, monkeypatch, config_ov assert thread_run_count == 1 def test_scheduler_runs_export(self, monkeypatch, config_override): + monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) global thread_run_count thread_run_count = 0 config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "export_destination": "url", "export_timer": 0, } - } + }, } ) @@ -272,12 +308,13 @@ def test_scheduler_export_ignores_timer_not_expired(self, monkeypatch, config_ov config_override( { + "rpki": {"roa_source": None}, "sources": { "TEST": { "export_destination": "url", "export_timer": 100, } - } + }, } ) From d57a3b15e4c4eb122c9d4b4947c39c88e4d95e48 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Tue, 11 Jul 2023 21:22:54 +0200 Subject: [PATCH 04/10] add docs --- docs/admins/availability-and-migration.rst | 279 ++++++++++----------- docs/admins/configuration.rst | 10 + irrd/conf/__init__.py | 3 + irrd/conf/test_conf.py | 4 +- 4 files changed, 149 insertions(+), 147 deletions(-) diff --git a/docs/admins/availability-and-migration.rst b/docs/admins/availability-and-migration.rst index 9a50aaffd..82126460a 100644 --- a/docs/admins/availability-and-migration.rst +++ b/docs/admins/availability-and-migration.rst @@ -47,19 +47,115 @@ This document mainly discusses three kinds of IRRd instances: that processes authoritative changes, and is the single source of truth, at one point in time. IRRd does not support having multiple active instances. -This document suggest three different approaches for configuring this, -each with their own upsides and downsides. +.. warning:: + Previous versions of IRRd and this documentation suggested standby servers + with NRTM as an option. This option is strongly recommended against, due to + incompatibility with :doc:`object suppression ` + along with other issues regarding mirror synchronisation. + The ``sources.{name}.export_destination_unfiltered`` and + ``sources.{name}.export_destination`` settings are deprecated. + + +Using PostgreSQL replication for standby and query-only instances +----------------------------------------------------------------- +The best option to run either standby or query-only instance is using +PostgreSQL replication. All persistent IRRD data is stored in the +PostgreSQL database, and will therefore be included. +PostgreSQL replication will also ensure all journal entries and +serials remain the same after a switch. +:doc:`Suppressed objects `, e.g. by RPKI +validation, are included in the replication as well. + +There are several important requirements for this setup: + +* The standby must run a PostgreSQL streaming replication from the + active instance. Logical replication is not supported. +* The PostgreSQL configurations must have ``track_commit_timestamp`` + enabled. +* On the standby, you run the IRRD instance with the ``database_readonly`` + and ``standby`` parameters set. +* The standby instance must use its own Redis instance. Do not use + Redis replication. +* It is recommended that all PostgreSQL instances only host the IRRd + database. Streaming replication will always include all databases, + and commits received on the standby in any database will trigger + a local preloaded data refresh. + +As replication replicates the entire database, any IRR registries +mirrored on the active instance, are also mirrored on the standby, +through the PostgreSQL replication process. + +Consistency in object suppression settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you query IRRD's configuration on a standby, e.g. with the ``!J`` +query, it will reflect the local configuration regarding +:doc:`object suppression settings `. +However, the standby does not use these settings: its database is +read only, and instead the suppression is applied by the active +instance and then replicated. + +For consistency in this query output, and reduced risk of configuration +inconsistencies after promoting a standby, you are encouraged to keep +the object suppression settings identical on all instances, even +if some are (currently) not used. + +Promoting a standby instance to active +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The general plan for promoting an IRRDv4 instance is: +* Hold all update emails. +* Ensure PostgreSQL replication is up to date. +* Promote the PostgreSQL replica to become a main server. +* Disable the ``database_readonly`` and ``standby`` settings in IRRd. +* Make sure your IRRD configuration on the standby is up to date + compared to the old active (ideally, manage this continuously). +* Set the authoritative sources to ``authoritative: true`` in the config + of the promoted instance. +* Start the IRRd instance. +* Redirect queries to the new instance. +* Run the ``irrd_load_pgp_keys`` command to load all PGP keys from + authoritative sources into the local keychain, allowing them to be used + for authentication. +* Redirect update emails to the new instance. +* Ensure published exports are now taken from the new instance. -Option 1: using exports and NRTM for migrations and standby instances ---------------------------------------------------------------------- -The first option is to use the same :doc:`mirroring ` -features as any other kind of IRR data mirroring. This means using the files -placed in ``sources.{name}.export_destination`` by the active instance -as the ``sources.{name}.import_source`` for the standby instances, -and having standby's follow the active NRTM stream. -If you are migrating from a legacy version of IRRd, this is most likely your -only option. +.. warning:: + If users use IRRD internal authentication, by logging in through + the web interface, ensure you use a consistent URL where you + direct to the current active instance by DNS records. WebAuthn + tokens are tied to the URL as seen by the browser, and will + become unusable after a URL change. + +Upgrading IRRD +~~~~~~~~~~~~~~ +When upgrading your IRRD instances, first upgrade the active instance, +then the standby instances. If you need to run ``irrd_database_upgrade`` +as part of the upgrade, only do so on the active instance. + +You are encouraged to test upgrades yourself before applying them +in production. + +Increased preload data refresh on standby instances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There is one inefficiency in the replication process: like an active +instance, a standby instance will keep certain data in memory and/or +Redis for performance reasons. This data needs to be refreshed if +certain data changes in the SQL database. + +On an active instance, the preloaded data is refreshed only when +relevant RPSL objects have changed. On a replica, this information +is not available. Therefore, standby instances refresh this data +after any change to the SQL database. Therefore, you may see more +load on the preload process than is typical on an active instance. +Refreshes are batched, so only a single one will run at a time. + + +Migration from legacy IRRD +-------------------------- +To migrate from a legacy IRRD version, you can use the same +:doc:`mirroring ` features as any other kind of IRR +data mirroring. In addition to usual mirroring, you should enable +``strict_import_keycert_objects`` for the source. This is a bit different from "regular" mirroring, where the mirror is never meant to be promoted to an active instance, and instances may be run by entirely @@ -78,7 +174,6 @@ regular mirror for other registries. multiple objects in your file with the same key, IRRd will only import the last one. - Object validation ~~~~~~~~~~~~~~~~~ Mirrored sources use @@ -97,64 +192,9 @@ even in non-strict mode. These objects are logged. **While running IRRd 4 as a mirror, you should check the logs for any such objects - they will disappear when you make IRRd 4 your authoritative instance.** -GPG keychain imports -~~~~~~~~~~~~~~~~~~~~ -In short: standby instances should have ``strict_import_keycert_objects`` -enabled. - -IRRd uses GnuPG to validate PGP signatures used to authenticate authoritative -changes. This means that all `key-cert` objects need to be inserted into the -GnuPG keychain before users can submit PGP signed updates. - -By default, IRRd only inserts public PGP keys from `key-cert` objects for -authoritative sources - as there is no reason to do PGP signature validation -for non-authoritative sources. However, a standby source needs to have these -keys imported already to become active later. This can be enabled with the -``strict_import_keycert_objects`` setting on the mirror configuration. -When enabled, `key-cert` objects always use the strict importer which includes -importing into the key chain, which allows them to be used for authentication -in the future. - -If your IRRd instance already has (or may have) `key-cert` objects that were -imported without ``strict_import_keycert_objects``, you can insert them into the -local keychain with the ``irrd_load_pgp_keys`` command. - -The ``irrd_load_pgp_keys`` command may fail to import certain keys if they use -an unsupported format. It is safe to run multiple times, even if some or all -keys are already in the keychain, and safe to run while IRRd is running. - -Password hashes -~~~~~~~~~~~~~~~ -Password authentication depends on password hashes in `mntner` objects. -To improve security, these password hashes are not included in exports or -NRTM streams for regular mirrors in IRRDv4. - -However, when an IRRd mirror is a standby -instance that may need to take an active role later, it needs all password -hashes. To support this, you need to configure a special mirroring process -on the current active instance: - -* Set ``sources.{name}.export_destination_unfiltered`` to a path where IRRd - will store exports that include full password hashes. Other than including - full hashes, this works the same as ``sources.{name}.export_destination``. - Then, distribute those files to your standby instance, and point - ``import_source`` to their location. -* Set ``sources.{name}.nrtm_access_list_unfiltered`` to an access list defined - in the configuration file. Any IP on this access list will receive - full password hashes when doing NRTM requests. Other than that, NRTM works - identical to filtered queries. Set this to the IPs of your standby instances. - -On the standby instance, you do not need any specific configuration. -However, if you used previously imported `mntner` objects without full hashes -on the standby, you need to do a full reload of the data on the standby to -ensure it has full hashes for all objects. - -If you are migrating from a different IRR server, check that password -hashes are not filtered. - Serials ~~~~~~~ -Each IRRd instance potentially creates its own set of NRTM serials when +Each instance potentially creates its own set of NRTM serials when importing changes over NRTM. This means that when switching to a different instance, mirrors would have to refresh their data. @@ -172,30 +212,11 @@ For further details, see the Without synchronised serials, the RPSL export, CURRENTSERIAL file, and NRTM feed used by a mirror must all come from the same source instance. -RPKI and scope filter -~~~~~~~~~~~~~~~~~~~~~ -:doc:`RPKI-aware mode ` and the -:doc:`scope filter ` make invalid or out of scope -objects invisible locally. These are not included in any exports, and if -an existing object becomes invalid or out of scope, a deletion is added -to the NRTM journal. - -IRRd retains invalid or out of scope objects, and they may become visible again -if their status is changed by a configuration or ROA change. -However, a standby or query-only instance using exports and NRTM will never see -objects that are invalid or out of scope on the active instance, as they are -not included in mirroring. -Upon promoting a standby instance to an active instance, these -objects are lost permanently. - -For the same reasons, standby and query-only instances that receive their -data over NRTM can not be queried for RPKI invalid or out of scope objects, -as they never see these objects. - -Promoting a standby to the active instance -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you use IRR mirroring with exports and NRTM, the general plan for promoting -an IRRDv4 instance would be: + +Promoting a IRRD mirror of legacy IRRD to active +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you use IRR mirroring with exports and NRTM, the general plan for switching +from a legacy IRRD to a new IRRDv4 instance would be: * Hold all update emails. * Ensure an NRTM update has run so that the instances are in sync @@ -210,61 +231,29 @@ an IRRDv4 instance would be: * If you were not using synchronised serials, all instances mirroring from your instance, must reload their local copy. -If this is part of a planned migration from a previous version, it is -recommended that you test existing tools and queries against the new IRRDv4 -instance before promoting it to be active. - - -Option 2: PostgreSQL replication -------------------------------------------- +It is recommended that you test existing tools and queries against the +new IRRDv4 instance before promoting it to be active. -.. danger:: - Since adding this section, an issue was discovered with using PostgreSQL - replication: the `local preload store may not be updated`_ causing - potential stale responses to queries. - .. _local preload store may not be updated: https://github.com/irrdnet/irrd/issues/656 - -Except for configuration, IRRd stores all its data in the PostgreSQL database. -Redis is used for passing derived data and commands. - -You could run two IRRd instances, each on their own PostgreSQL instance, which -use PostgreSQL replication as the synchronisation mechanism. In the standby -IRRd, configure the instance as ``database_readonly`` to prevent local changes. -Note that this prevents the IRRd instance from making any changes of any kind -to the local database. - -For Redis, you need to connect all instances to the same Redis instance, -or use `Redis replication`_. +Background: GPG keychain imports +-------------------------------- +IRRd uses GnuPG to validate PGP signatures used to authenticate authoritative +changes. This means that all `key-cert` objects need to be inserted into the +GnuPG keychain before users can submit PGP signed updates. -Using PostgreSQL replication solves some of the issues mentioned for other -options, but may have other limitations or issues that are out of scope -for IRRd itself. +By default, IRRd only inserts public PGP keys from `key-cert` objects for +authoritative sources - as there is no reason to do PGP signature validation +for non-authoritative sources. However, a standby source needs to have these +keys imported already to become active later. This can be enabled with the +``strict_import_keycert_objects`` setting on the mirror configuration. +When enabled, `key-cert` objects always use the strict importer which includes +importing into the key chain, which allows them to be used for authentication +in the future. -.. _Redis replication: https://redis.io/topics/replication +If your IRRd instance already has (or may have) `key-cert` objects that were +imported without ``strict_import_keycert_objects``, you can insert them into the +local keychain with the ``irrd_load_pgp_keys`` command. -GPG keychain imports with PostgreSQL replication -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When you use PostgreSQL replication, the same issue occurs with the GPG -keychain as with NRTM: in order to authenticate updates to authoritative -changes, the PGP keys need to be loaded into the local keychain, which does -not happen for mirrors. - -When using PostgreSQL replication, IRRd is not aware of how the objects in the -database are being changed. Therefore, you need to run the -``irrd_load_pgp_keys`` command before making a standby instance the active -instance to make sure PGP authentication keeps working. - - -Option 3: rebuilding from a periodic SQL dump ---------------------------------------------- -You can make a SQL dump of the PostgreSQL database and load it on another IRRd -instance. This is one of the simplest methods. However, it has one significant -danger: if changes happened in the old active instance, after the dump was made, -the dump is loaded into a new instance, which is then promoted to active, the -changes are not in the dump. This is expected. Worse is that new -changes made in the new active instance will reuse the same serials, and may -not be picked up by NRTM mirrors unless they refresh their copy. - -The same concerns for the GPG keychain with PostgreSQL replication apply -to this method as well. +The ``irrd_load_pgp_keys`` command may fail to import certain keys if they use +an unsupported format. It is safe to run multiple times, even if some or all +keys are already in the keychain, and safe to run while IRRd is running. diff --git a/docs/admins/configuration.rst b/docs/admins/configuration.rst index 58558d528..5b0cf672f 100644 --- a/docs/admins/configuration.rst +++ b/docs/admins/configuration.rst @@ -205,6 +205,13 @@ General settings ``import_source`` or ``nrtm_host`` set. |br| **Default**: ``false``. |br| **Change takes effect**: after full IRRd restart. +* ``standby``: a boolean for whether this instance is + in standby mode. See + :doc:`availability with PostgreSQL replication ` + for further details. Requires ``database_readonly`` to be set. + **Do not enable this setting without reading the further documentation on standby setups.** + |br| **Default**: ``false``. + |br| **Change takes effect**: after full IRRd restart. * ``redis_url``: a URL to a Redis instance, e.g. ``unix:///var/run/redis.sock`` to connect through a unix socket, or ``redis://localhost`` to connect through TCP. @@ -664,6 +671,7 @@ Sources Sharing password hashes externally is a security risk, the unfiltered data is intended only to support :doc:`availability and data migration `. + **This setting is deprecated and will be removed in IRRD 4.5.** |br| **Default**: not defined, no exports made. |br| **Change takes effect**: after SIGHUP, at the next ``export_timer``. * ``sources.{name}.export_timer``: the time between two full exports of all @@ -686,6 +694,8 @@ Sources Unfiltered means full password hashes are included. Sharing password hashes externally is a security risk, the unfiltered data is intended only to support + :doc:`availability and data migration `. + **This setting is deprecated and will be removed in IRRD 4.5.** |br| **Default**: not defined, all access denied. Clients in ``nrtm_access_list``, if defined, have filtered access. |br| **Change takes effect**: after SIGHUP, upon next request. diff --git a/irrd/conf/__init__.py b/irrd/conf/__init__.py index 325e01e9e..cc283cf4d 100644 --- a/irrd/conf/__init__.py +++ b/irrd/conf/__init__.py @@ -268,6 +268,9 @@ def _validate_subconfig(key, value): config.get("server.http.status_access_list"), } + if config.get("standby") and not config.get("database_readonly"): + errors.append("Setting standby can only be set combined with database_readonly.") + if not self._check_is_str(config, "email.from") or "@" not in config.get("email.from", ""): errors.append("Setting email.from is required and must be an email address.") if not self._check_is_str(config, "email.smtp"): diff --git a/irrd/conf/test_conf.py b/irrd/conf/test_conf.py index ea809909a..1cfd1387b 100644 --- a/irrd/conf/test_conf.py +++ b/irrd/conf/test_conf.py @@ -233,11 +233,11 @@ def test_load_valid_reload_invalid_config(self, save_yaml_config, tmpdir, caplog } ) - save_yaml_config({}, run_init=False) + save_yaml_config({"irrd": {"standby": True}}, run_init=False) os.kill(os.getpid(), signal.SIGHUP) assert list(get_setting("sources_default")) == ["TESTDB2", "TESTDB", "RPKI"] assert "Errors found in configuration, continuing with current settings" in caplog.text - assert 'Could not find root item "irrd"' in caplog.text + assert 'Setting standby can only be set' in caplog.text def test_load_invalid_config(self, save_yaml_config, tmpdir): config = { From f6cbdab2a180197c87a009984f23c7f22230c5c9 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Tue, 11 Jul 2023 22:15:20 +0200 Subject: [PATCH 05/10] lint --- irrd/conf/test_conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irrd/conf/test_conf.py b/irrd/conf/test_conf.py index 1cfd1387b..087dc941d 100644 --- a/irrd/conf/test_conf.py +++ b/irrd/conf/test_conf.py @@ -237,7 +237,7 @@ def test_load_valid_reload_invalid_config(self, save_yaml_config, tmpdir, caplog os.kill(os.getpid(), signal.SIGHUP) assert list(get_setting("sources_default")) == ["TESTDB2", "TESTDB", "RPKI"] assert "Errors found in configuration, continuing with current settings" in caplog.text - assert 'Setting standby can only be set' in caplog.text + assert "Setting standby can only be set" in caplog.text def test_load_invalid_config(self, save_yaml_config, tmpdir): config = { From 35471c3e6e489f8dc3fcbfe2dd83e2257275efce Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Wed, 12 Jul 2023 11:10:12 +0200 Subject: [PATCH 06/10] doc updates --- docs/admins/availability-and-migration.rst | 121 ++++++++++++++++----- 1 file changed, 93 insertions(+), 28 deletions(-) diff --git a/docs/admins/availability-and-migration.rst b/docs/admins/availability-and-migration.rst index 82126460a..a5d78747a 100644 --- a/docs/admins/availability-and-migration.rst +++ b/docs/admins/availability-and-migration.rst @@ -30,6 +30,8 @@ IRRd instances in a number of cases: * You are using one IRRd instance as the active instance, and would like to have a second on standby to promote to the active instance with the most recent data. +* You have a large volume of queries and want to distribute load over + multiple instances. This document mainly discusses three kinds of IRRd instances: @@ -64,13 +66,14 @@ PostgreSQL database, and will therefore be included. PostgreSQL replication will also ensure all journal entries and serials remain the same after a switch. :doc:`Suppressed objects `, e.g. by RPKI -validation, are included in the replication as well. +validation, and suspended objects, +are correctly included in the replication as well. There are several important requirements for this setup: * The standby must run a PostgreSQL streaming replication from the active instance. Logical replication is not supported. -* The PostgreSQL configurations must have ``track_commit_timestamp`` +* The PostgreSQL configuration must have ``track_commit_timestamp`` enabled. * On the standby, you run the IRRD instance with the ``database_readonly`` and ``standby`` parameters set. @@ -109,8 +112,9 @@ The general plan for promoting an IRRDv4 instance is: * Disable the ``database_readonly`` and ``standby`` settings in IRRd. * Make sure your IRRD configuration on the standby is up to date compared to the old active (ideally, manage this continuously). -* Set the authoritative sources to ``authoritative: true`` in the config - of the promoted instance. + Make sure the ``authoritative`` setting is enabled on your authoritative + source, and mirroring settings for any mirrored sources, e.g. + ``nrtm_host`` are correct. * Start the IRRd instance. * Redirect queries to the new instance. * Run the ``irrd_load_pgp_keys`` command to load all PGP keys from @@ -121,22 +125,29 @@ The general plan for promoting an IRRDv4 instance is: .. warning:: If users use IRRD internal authentication, by logging in through - the web interface, ensure you use a consistent URL where you + the web interface, ensure you have a consistent URL, i.e. direct to the current active instance by DNS records. WebAuthn tokens are tied to the URL as seen by the browser, and will - become unusable after a URL change. + become unusable if you change the URL. Upgrading IRRD ~~~~~~~~~~~~~~ When upgrading your IRRD instances, first upgrade the active instance, then the standby instances. If you need to run ``irrd_database_upgrade`` -as part of the upgrade, only do so on the active instance. +as part of the upgrade, only do so on the active instance. PostgreSQL +replication will include the schema changes and update standby +databases. -You are encouraged to test upgrades yourself before applying them +.. note:: + During the time between the database upgrade and upgrading the IRRD + version on a standby instance, queries on the standby instance may fail. + This depends on the exact changes between versions. + +You are encouraged to always test upgrades yourself before applying them in production. -Increased preload data refresh on standby instances -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Preload data refresh on standby instances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There is one inefficiency in the replication process: like an active instance, a standby instance will keep certain data in memory and/or Redis for performance reasons. This data needs to be refreshed if @@ -149,6 +160,52 @@ after any change to the SQL database. Therefore, you may see more load on the preload process than is typical on an active instance. Refreshes are batched, so only a single one will run at a time. +Due to small differences in the timing of the preload process, +there may be an additional delay in updating responses to some +queries on the standby compared to the active instance, in the +order of 15-60 seconds. +This concerns the whois queries ``!g``, ``!6``, ``!a`` and in some cases ``!i``, +and the GraphQL queries ``asnPrefixes`` and ``asSetPrefixes``. + + +Query-only instances using NRTM +------------------------------- +If you want to distribute the query load, but will never promote the +secondaries to active instances, you can use the PostgreSQL replication +method described above, or NRTM mirroring. +Consider carefully whether you really only need a query-only +instance, or may need to use it as a standby instance later. Promoting +an NRTM query-only instance to an active instance is unsupported. + +When others mirror from your instance using NRTM, you need to be aware +of serial synchronisation. There are two options: + +* Direct all NRTM queries to your active instance. Publish the RPSL export + and CURRENTSERIAL file from that instance. +* Use synchronised serials, allowing NRTM queries to be sent to any query-only + instance. Publish the RPSL export and CURRENTSERIAL file from the active + instance. + +For further details, see the +:ref:`NRTM serial handling documentation `. + +.. warning:: + When **not** using synchronised serials, NRTM users must never be switched + (e.g. by DNS changes or load balancers) to different instances, without + reloading their local copy. Otherwise they may silently lose updates. + + +Loading from a PostgreSQL backup +-------------------------------- +You can initialise an IRRD instance from a database backup, either as +part of a recovery or a planned migration. Key steps: + +* If the backup was made with an older IRRD version, run + ``irrd_database_upgrade`` to upgrade the schema. +* Run the ``irrd_load_pgp_keys`` command to load all PGP keys from + authoritative sources into the local keychain, allowing them to be used + for authentication. + Migration from legacy IRRD -------------------------- @@ -199,22 +256,8 @@ importing changes over NRTM. This means that when switching to a different instance, mirrors would have to refresh their data. -IRRd can run a mirror in synchronised serial mode. This is used by some -deployments to spread their query load over multiple read-only instances. -For further details, see the -:ref:`NRTM serial handling documentation `. - -.. warning:: - When not using synchronised serials, NRTM users must never be switched - (e.g. by DNS changes or load balancers) to different instances, without - reloading their local copy. Otherwise they may silently lose updates. - - Without synchronised serials, the RPSL export, CURRENTSERIAL file, and NRTM - feed used by a mirror must all come from the same source instance. - - -Promoting a IRRD mirror of legacy IRRD to active -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Promoting a IRRD mirror of a legacy instance to active +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you use IRR mirroring with exports and NRTM, the general plan for switching from a legacy IRRD to a new IRRDv4 instance would be: @@ -235,8 +278,11 @@ It is recommended that you test existing tools and queries against the new IRRDv4 instance before promoting it to be active. -Background: GPG keychain imports --------------------------------- +Background and design considerations +------------------------------------ + +GPG keychain imports +~~~~~~~~~~~~~~~~~~~~ IRRd uses GnuPG to validate PGP signatures used to authenticate authoritative changes. This means that all `key-cert` objects need to be inserted into the GnuPG keychain before users can submit PGP signed updates. @@ -257,3 +303,22 @@ local keychain with the ``irrd_load_pgp_keys`` command. The ``irrd_load_pgp_keys`` command may fail to import certain keys if they use an unsupported format. It is safe to run multiple times, even if some or all keys are already in the keychain, and safe to run while IRRd is running. + +Suppressed objects +~~~~~~~~~~~~~~~~~~ +:doc:`Suppressed objects ` are invisible +to normal queries and to the NRTM feed, but not deleted. They may +become visible again at any point in the future, e.g. by someone +creating a ROA or a change in another object. + +Suppressed objects are included in the PostgreSQL database, but not +in any RPSL exports. Therefore, the RPSL exports can not be used +as a full copy of the database. Otherwise all suppressed objects +would be lost upon promotion of a standby instance, which has +seemingly no effect if they remain suppressed, but also means they +can not become visible later. + +In a PostgreSQL replication setup, only the active instance will run +the object suppression tasks. Standby instances replicate the state +of the database including suppression status and e.g. the ROA +table. From a623708100cbd160e1eb9d7ad6c4fc6d81a0b56e Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Wed, 12 Jul 2023 11:19:39 +0200 Subject: [PATCH 07/10] doc update --- docs/admins/deployment.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/admins/deployment.rst b/docs/admins/deployment.rst index da8aa4680..5d40e365e 100644 --- a/docs/admins/deployment.rst +++ b/docs/admins/deployment.rst @@ -99,8 +99,6 @@ size of the RPSL text imported. The PostgreSQL database is the only source of IRRd's data. This means you need to run regular backups of the database. - It is also possible to restore data from recent exports, - but changes made since the most recent export will be lost. .. _deployment-redis-configuration: From 7e55f6beaaf86de920d2e6f20ec44df9e3de7f59 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Wed, 12 Jul 2023 11:35:29 +0200 Subject: [PATCH 08/10] preload during ro --- irrd/daemon/main.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/irrd/daemon/main.py b/irrd/daemon/main.py index 28ca02a76..81cd4844a 100755 --- a/irrd/daemon/main.py +++ b/irrd/daemon/main.py @@ -142,10 +142,8 @@ def run_irrd(mirror_frequency: int, config_file_path: str, uid: Optional[int], g mirror_scheduler = MirrorScheduler() - preload_manager = None - if not get_setting(f"database_readonly"): - preload_manager = PreloadStoreManager(name="irrd-preload-store-manager") - preload_manager.start() + preload_manager = PreloadStoreManager(name="irrd-preload-store-manager") + preload_manager.start() uvicorn_process = ExceptionLoggingProcess( target=run_http_server, name="irrd-http-server-listener", args=(config_file_path,) From ef87bf6baf00ebd71feb02c3ad7fe60b9526f137 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Wed, 12 Jul 2023 15:47:50 +0200 Subject: [PATCH 09/10] add note about settings consistency --- docs/admins/availability-and-migration.rst | 16 ++++++++++++++++ irrd/mirroring/jobs.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/admins/availability-and-migration.rst b/docs/admins/availability-and-migration.rst index a5d78747a..64b4a1a16 100644 --- a/docs/admins/availability-and-migration.rst +++ b/docs/admins/availability-and-migration.rst @@ -79,6 +79,15 @@ There are several important requirements for this setup: and ``standby`` parameters set. * The standby instance must use its own Redis instance. Do not use Redis replication. +* ``rpki.roa_source`` must be consistent between active and standby + configurations. +* You are recommended to keep other settings, like ``scopefilter``, + ``sources.{name}.route_object_preference``, + ``sources.{name}.object_class_filter`` consistent between active + and standby. Note that you can not set + ``sources.{name}.authoritative``, ``sources.{name}.nrtm_host``, or + ``sources.{name}.import_source`` on a standby instance, as these + conflict with ``database_readonly``. * It is recommended that all PostgreSQL instances only host the IRRd database. Streaming replication will always include all databases, and commits received on the standby in any database will trigger @@ -102,6 +111,10 @@ inconsistencies after promoting a standby, you are encouraged to keep the object suppression settings identical on all instances, even if some are (currently) not used. +For RPKI, ``rpki.roa_source`` must be consistent between active and +standby, because that setting determines whether the query parser +considers ``RPKI`` a valid source. + Promoting a standby instance to active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The general plan for promoting an IRRDv4 instance is: @@ -122,6 +135,9 @@ The general plan for promoting an IRRDv4 instance is: for authentication. * Redirect update emails to the new instance. * Ensure published exports are now taken from the new instance. +* Check the mirroring status to ensure the new active instance + has access to all exports and NRTM streams (some other operators + restrict NRTM access to certain IPs). .. warning:: If users use IRRD internal authentication, by logging in through diff --git a/irrd/mirroring/jobs.py b/irrd/mirroring/jobs.py index 034ee022c..582597f46 100644 --- a/irrd/mirroring/jobs.py +++ b/irrd/mirroring/jobs.py @@ -13,7 +13,7 @@ class TransactionTimePreloadSignaller: Signal a preload based on the last transaction time. """ - last_time = Optional[datetime] + last_time: Optional[datetime] = None def run(self): self.database_handler = DatabaseHandler() @@ -22,13 +22,13 @@ def run(self): try: current_time = self.database_handler.timestamp_last_committed_transaction() if not self.last_time or self.last_time != current_time: - self.preloader.signal_reload() logger.debug( ( f"Signalling preload reload: last transaction completed {current_time}, previous" f" known last transaction was {self.last_time}" ), ) + self.preloader.signal_reload() self.last_time = current_time except Exception as exc: logger.error( From e2964dc0f5523c0460a5f144909baabbc1d82d7a Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Thu, 13 Jul 2023 11:02:13 +0200 Subject: [PATCH 10/10] flatten settings --- docs/admins/availability-and-migration.rst | 14 +++++++---- docs/admins/configuration.rst | 16 ++++--------- irrd/conf/__init__.py | 7 ++---- irrd/conf/known_keys.py | 3 +-- irrd/conf/test_conf.py | 10 ++++---- irrd/mirroring/scheduler.py | 6 ++--- irrd/mirroring/tests/test_scheduler.py | 27 ++++------------------ irrd/scripts/expire_journal.py | 4 ++-- irrd/scripts/irrd_control.py | 10 ++++---- irrd/scripts/load_database.py | 4 ++-- irrd/scripts/mirror_force_reload.py | 4 ++-- irrd/scripts/rpsl_read.py | 4 ++-- irrd/scripts/set_last_modified_auth.py | 4 ++-- irrd/scripts/tests/test_irrd_control.py | 12 +++++----- irrd/scripts/update_database.py | 4 ++-- irrd/storage/database_handler.py | 4 ++-- irrd/storage/preload.py | 2 +- irrd/storage/tests/test_database.py | 2 +- 18 files changed, 56 insertions(+), 81 deletions(-) diff --git a/docs/admins/availability-and-migration.rst b/docs/admins/availability-and-migration.rst index 64b4a1a16..7e49de73b 100644 --- a/docs/admins/availability-and-migration.rst +++ b/docs/admins/availability-and-migration.rst @@ -74,9 +74,9 @@ There are several important requirements for this setup: * The standby must run a PostgreSQL streaming replication from the active instance. Logical replication is not supported. * The PostgreSQL configuration must have ``track_commit_timestamp`` - enabled. -* On the standby, you run the IRRD instance with the ``database_readonly`` - and ``standby`` parameters set. + and ``hot_standby_feedback`` enabled. +* On the standby, you run the IRRD instance with the ``readonly_standby`` + parameters set. * The standby instance must use its own Redis instance. Do not use Redis replication. * ``rpki.roa_source`` must be consistent between active and standby @@ -87,11 +87,15 @@ There are several important requirements for this setup: and standby. Note that you can not set ``sources.{name}.authoritative``, ``sources.{name}.nrtm_host``, or ``sources.{name}.import_source`` on a standby instance, as these - conflict with ``database_readonly``. + conflict with ``readonly_standby``. * It is recommended that all PostgreSQL instances only host the IRRd database. Streaming replication will always include all databases, and commits received on the standby in any database will trigger a local preloaded data refresh. +* Although the details of PostgreSQL are out of scope for + this documentation, the use of replication slots is recommended. + Make sure to drop a replication slot if you decommission a + standby server, to prevent infinite PostgreSQL WAL growth. As replication replicates the entire database, any IRR registries mirrored on the active instance, are also mirrored on the standby, @@ -122,7 +126,7 @@ The general plan for promoting an IRRDv4 instance is: * Hold all update emails. * Ensure PostgreSQL replication is up to date. * Promote the PostgreSQL replica to become a main server. -* Disable the ``database_readonly`` and ``standby`` settings in IRRd. +* Disable the ``readonly_standby`` setting in IRRd. * Make sure your IRRD configuration on the standby is up to date compared to the old active (ideally, manage this continuously). Make sure the ``authoritative`` setting is enabled on your authoritative diff --git a/docs/admins/configuration.rst b/docs/admins/configuration.rst index 5b0cf672f..082fb9f2f 100644 --- a/docs/admins/configuration.rst +++ b/docs/admins/configuration.rst @@ -196,19 +196,11 @@ General settings for improved performance |br| **Default**: not defined, but required. |br| **Change takes effect**: after full IRRd restart. -* ``database_readonly``: a boolean for whether this instance is - database read only, i.e. IRRd will never write any changes to the SQL database - in any circumstance. This can be used for - :doc:`availability with PostgreSQL replication `. - This setting means that this IRRd instance will never run the RPKI or scope - filter validators, and can not be used if any source has ``authoritative``, - ``import_source`` or ``nrtm_host`` set. - |br| **Default**: ``false``. - |br| **Change takes effect**: after full IRRd restart. -* ``standby``: a boolean for whether this instance is - in standby mode. See +* ``readonly_standby``: a boolean for whether this instance is + in read-only standby mode. See :doc:`availability with PostgreSQL replication ` - for further details. Requires ``database_readonly`` to be set. + for further details. Can not be used if any source has ``authoritative``, + ``import_source`` or ``nrtm_host`` set. **Do not enable this setting without reading the further documentation on standby setups.** |br| **Default**: ``false``. |br| **Change takes effect**: after full IRRd restart. diff --git a/irrd/conf/__init__.py b/irrd/conf/__init__.py index cc283cf4d..c6be01951 100644 --- a/irrd/conf/__init__.py +++ b/irrd/conf/__init__.py @@ -268,9 +268,6 @@ def _validate_subconfig(key, value): config.get("server.http.status_access_list"), } - if config.get("standby") and not config.get("database_readonly"): - errors.append("Setting standby can only be set combined with database_readonly.") - if not self._check_is_str(config, "email.from") or "@" not in config.get("email.from", ""): errors.append("Setting email.from is required and must be an email address.") if not self._check_is_str(config, "email.smtp"): @@ -399,12 +396,12 @@ def _validate_subconfig(key, value): "nrtm_host or import_source are set." ) - if config.get("database_readonly") and ( + if config.get("readonly_standby") and ( details.get("authoritative") or details.get("nrtm_host") or details.get("import_source") ): errors.append( f"Source {name} can not have authoritative, import_source or nrtm_host set " - "when database_readonly is enabled." + "when readonly_standby is enabled." ) number_fields = [ diff --git a/irrd/conf/known_keys.py b/irrd/conf/known_keys.py index f3e515719..87476ec96 100644 --- a/irrd/conf/known_keys.py +++ b/irrd/conf/known_keys.py @@ -8,8 +8,7 @@ KNOWN_CONFIG_KEYS = DottedDict( { "database_url": {}, - "database_readonly": {}, - "standby": {}, + "readonly_standby": {}, "redis_url": {}, "piddir": {}, "user": {}, diff --git a/irrd/conf/test_conf.py b/irrd/conf/test_conf.py index 087dc941d..54e39801b 100644 --- a/irrd/conf/test_conf.py +++ b/irrd/conf/test_conf.py @@ -233,16 +233,16 @@ def test_load_valid_reload_invalid_config(self, save_yaml_config, tmpdir, caplog } ) - save_yaml_config({"irrd": {"standby": True}}, run_init=False) + save_yaml_config({}, run_init=False) os.kill(os.getpid(), signal.SIGHUP) assert list(get_setting("sources_default")) == ["TESTDB2", "TESTDB", "RPKI"] assert "Errors found in configuration, continuing with current settings" in caplog.text - assert "Setting standby can only be set" in caplog.text + assert 'Could not find root item "irrd"' in caplog.text def test_load_invalid_config(self, save_yaml_config, tmpdir): config = { "irrd": { - "database_readonly": True, + "readonly_standby": True, "piddir": str(tmpdir + "/does-not-exist"), "user": "a", "secret_key": "sssssssssssss", @@ -379,12 +379,12 @@ def test_load_invalid_config(self, save_yaml_config, tmpdir): in str(ce.value) ) assert ( - "Source TESTDB can not have authoritative, import_source or nrtm_host set when database_readonly" + "Source TESTDB can not have authoritative, import_source or nrtm_host set when readonly_standby" " is enabled." in str(ce.value) ) assert ( - "Source TESTDB3 can not have authoritative, import_source or nrtm_host set when database_readonly" + "Source TESTDB3 can not have authoritative, import_source or nrtm_host set when readonly_standby" " is enabled." in str(ce.value) ) diff --git a/irrd/mirroring/scheduler.py b/irrd/mirroring/scheduler.py index f6256118f..732f520fb 100644 --- a/irrd/mirroring/scheduler.py +++ b/irrd/mirroring/scheduler.py @@ -75,13 +75,13 @@ def __init__(self, *args, **kwargs): self.previous_scopefilter_prefixes = None self.previous_scopefilter_asns = None self.previous_scopefilter_excluded = None + # This signaller is special in that it does not run in a separate + # process and keeps state in the instance. self.transaction_time_preload_signaller = TransactionTimePreloadSignaller() def run(self) -> None: - if get_setting("standby"): + if get_setting("readonly_standby"): self.transaction_time_preload_signaller.run() - - if get_setting("database_readonly"): return if get_setting("rpki.roa_source"): diff --git a/irrd/mirroring/tests/test_scheduler.py b/irrd/mirroring/tests/test_scheduler.py index acd3de6fc..156d60c92 100644 --- a/irrd/mirroring/tests/test_scheduler.py +++ b/irrd/mirroring/tests/test_scheduler.py @@ -16,11 +16,15 @@ def test_scheduler_standby_preload_signaller(self, monkeypatch, config_override) monkeypatch.setattr( "irrd.mirroring.scheduler.TransactionTimePreloadSignaller", mock_preload_signaller ) + monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) + monkeypatch.setattr("irrd.mirroring.scheduler.RPSLMirrorImportUpdateRunner", MockRunner) + global thread_run_count + thread_run_count = 0 config_override( { "standby": True, - "database_readonly": True, + "readonly_standby": True, } ) @@ -31,27 +35,6 @@ def test_scheduler_standby_preload_signaller(self, monkeypatch, config_override) scheduler.run() assert flatten_mock_calls(mock_preload_signaller) == [["run", (), {}], ["run", (), {}]] - def test_scheduler_database_readonly(self, monkeypatch, config_override): - monkeypatch.setattr("irrd.mirroring.scheduler.TransactionTimePreloadSignaller", object) - monkeypatch.setattr("irrd.mirroring.scheduler.ScheduledTaskProcess", MockScheduledTaskProcess) - global thread_run_count - thread_run_count = 0 - - config_override( - { - "database_readonly": True, - "sources": { - "TEST": { - "import_source": "url", - "import_timer": 0, - } - }, - } - ) - - monkeypatch.setattr("irrd.mirroring.scheduler.RPSLMirrorImportUpdateRunner", MockRunner) - scheduler = MirrorScheduler() - scheduler.run() assert thread_run_count == 0 def test_scheduler_runs_rpsl_import(self, monkeypatch, config_override): diff --git a/irrd/scripts/expire_journal.py b/irrd/scripts/expire_journal.py index 04cd15f49..c6c905ae0 100755 --- a/irrd/scripts/expire_journal.py +++ b/irrd/scripts/expire_journal.py @@ -83,8 +83,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) try: diff --git a/irrd/scripts/irrd_control.py b/irrd/scripts/irrd_control.py index e4b9dbf9e..55207441b 100755 --- a/irrd/scripts/irrd_control.py +++ b/irrd/scripts/irrd_control.py @@ -20,10 +20,10 @@ logger = logging.getLogger(__name__) -def check_database_readonly(f): +def check_readonly_standby(f): def new_func(*args, **kwargs): - if get_setting("database_readonly"): - raise click.ClickException("Unable to run this command, because database_readonly is set.") + if get_setting("readonly_standby"): + raise click.ClickException("Unable to run this command, because readonly_standby is set.") return f(*args, **kwargs) return update_wrapper(new_func, f) @@ -43,7 +43,7 @@ def cli(config): @cli.command() @click.argument("email") -@check_database_readonly +@check_readonly_standby @session_provider_manager_sync def user_mfa_clear(email, session_provider: ORMSessionProvider): """ @@ -92,7 +92,7 @@ def user_mfa_clear(email, session_provider: ORMSessionProvider): @cli.command() @click.argument("email") @click.option("--enable/--disable", default=True) -@check_database_readonly +@check_readonly_standby @session_provider_manager_sync def user_change_override(email: str, enable: bool, session_provider: ORMSessionProvider): """ diff --git a/irrd/scripts/load_database.py b/irrd/scripts/load_database.py index f4e07dcb8..1bf146442 100755 --- a/irrd/scripts/load_database.py +++ b/irrd/scripts/load_database.py @@ -72,8 +72,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) sys.exit(load(args.source, args.input_file, args.serial)) diff --git a/irrd/scripts/mirror_force_reload.py b/irrd/scripts/mirror_force_reload.py index de8ad899e..bcee9878a 100755 --- a/irrd/scripts/mirror_force_reload.py +++ b/irrd/scripts/mirror_force_reload.py @@ -36,8 +36,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) set_force_reload(args.source) diff --git a/irrd/scripts/rpsl_read.py b/irrd/scripts/rpsl_read.py index 7a31a74d1..c11e9d01b 100755 --- a/irrd/scripts/rpsl_read.py +++ b/irrd/scripts/rpsl_read.py @@ -107,8 +107,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) RPSLParse().main(args.input_file, args.strict_validation, args.database, not args.hide_info) diff --git a/irrd/scripts/set_last_modified_auth.py b/irrd/scripts/set_last_modified_auth.py index 8052d2de9..faff61411 100755 --- a/irrd/scripts/set_last_modified_auth.py +++ b/irrd/scripts/set_last_modified_auth.py @@ -57,8 +57,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) sys.exit(set_last_modified()) diff --git a/irrd/scripts/tests/test_irrd_control.py b/irrd/scripts/tests/test_irrd_control.py index c9774c414..a2cc9ec83 100644 --- a/irrd/scripts/tests/test_irrd_control.py +++ b/irrd/scripts/tests/test_irrd_control.py @@ -75,13 +75,13 @@ def test_user_does_not_exist(self, irrd_db_session_with_user, smtpd_override): assert "No user found" in result.output assert not smtpd_override.messages - def test_database_readonly(self, irrd_db_session_with_user, config_override, smtpd_override): - config_override({"database_readonly": True}) + def test_readonly_standby(self, irrd_db_session_with_user, config_override, smtpd_override): + config_override({"readonly_standby": True}) runner = CliRunner() result = runner.invoke(user_mfa_clear, ["user.email"]) assert result.exit_code == 1 - assert "database_readonly" in result.output + assert "readonly_standby" in result.output assert not smtpd_override.messages @@ -162,11 +162,11 @@ def test_no_mfa(self, irrd_db_session_with_user): assert result.exit_code == 1 assert "has no two-factor" in result.output - def test_database_readonly(self, irrd_db_session_with_user, config_override): - config_override({"database_readonly": True}) + def test_readonly_standby(self, irrd_db_session_with_user, config_override): + config_override({"readonly_standby": True}) session_provider, user = irrd_db_session_with_user runner = CliRunner() result = runner.invoke(user_change_override, [user.email, "--enable"], input="y") assert result.exit_code == 1 - assert "database_readonly" in result.output + assert "readonly_standby" in result.output diff --git a/irrd/scripts/update_database.py b/irrd/scripts/update_database.py index d546a81b3..dd301c360 100644 --- a/irrd/scripts/update_database.py +++ b/irrd/scripts/update_database.py @@ -64,8 +64,8 @@ def main(): # pragma: no cover args = parser.parse_args() config_init(args.config_file_path) - if get_setting("database_readonly"): - print("Unable to run, because database_readonly is set") + if get_setting("readonly_standby"): + print("Unable to run, because readonly_standby is set") sys.exit(-1) sys.exit(update(args.source, args.input_file)) diff --git a/irrd/storage/database_handler.py b/irrd/storage/database_handler.py index 454bdea49..9d2ec0a5d 100644 --- a/irrd/storage/database_handler.py +++ b/irrd/storage/database_handler.py @@ -92,10 +92,10 @@ def __init__(self, readonly=False): If readonly is True, this instance will expect read queries only. No transaction will be started, all queries will use autocommit. - Readonly is always true if database_readonly is set in the config. + Readonly is always true if readonly_standby is set in the config. """ self.status_tracker = None - if get_setting("database_readonly"): + if get_setting("readonly_standby"): self.readonly = True else: self.readonly = readonly diff --git a/irrd/storage/preload.py b/irrd/storage/preload.py index 4fab19e85..ae32c86cf 100644 --- a/irrd/storage/preload.py +++ b/irrd/storage/preload.py @@ -99,7 +99,7 @@ def __init__(self, enable_queries=True): callback=self._load_routes_into_memory, pubsub=self._pubsub, sleep_time=5, daemon=True ) self._pubsub_thread.start() - if get_setting("database_readonly"): # pragma: no cover + if get_setting("readonly_standby"): # pragma: no cover # If this instance is readonly, another IRRd process will be updating # the store, and likely has already done so, meaning we can try to load # from Redis right away instead of waiting for a signal. diff --git a/irrd/storage/tests/test_database.py b/irrd/storage/tests/test_database.py index b7ac5eb70..2b98d1be0 100644 --- a/irrd/storage/tests/test_database.py +++ b/irrd/storage/tests/test_database.py @@ -96,7 +96,7 @@ def test_readonly(self, monkeypatch, irrd_db_mock_preload, config_override): config_override( { - "database_readonly": True, + "readonly_standby": True, } )