From 14308e4e46a1fef1e21115b9deccca60ed3c3f1f Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Thu, 29 Jul 2021 15:12:25 +0200 Subject: [PATCH] Fix #525 - Reduce scope filter memory use (ref #497) (#530) --- irrd/scopefilter/tests/test_scopefilter.py | 138 ++++++++++++--------- irrd/scopefilter/validators.py | 12 +- 2 files changed, 93 insertions(+), 57 deletions(-) diff --git a/irrd/scopefilter/tests/test_scopefilter.py b/irrd/scopefilter/tests/test_scopefilter.py index 3922252d2..8808e957e 100644 --- a/irrd/scopefilter/tests/test_scopefilter.py +++ b/irrd/scopefilter/tests/test_scopefilter.py @@ -111,62 +111,83 @@ def test_validate_all_rpsl_objects(self, config_override, monkeypatch): }, }) - mock_query_result = [ - { - # Should become in_scope - 'rpsl_pk': '192.0.2.128/25,AS65547', - 'ip_first': '192.0.2.128', - 'prefix_length': 25, - 'asn_first': 65547, - 'source': 'TEST', - 'object_class': 'route', - 'object_text': 'text', - 'scopefilter_status': ScopeFilterStatus.out_scope_prefix, - }, - { - # Should become out_scope_prefix - 'rpsl_pk': '192.0.2.0/25,AS65547', - 'ip_first': '192.0.2.0', - 'prefix_length': 25, - 'asn_first': 65547, - 'source': 'TEST', - 'object_class': 'route', - 'object_text': 'text', - 'scopefilter_status': ScopeFilterStatus.in_scope, - }, - { - # Should become out_scope_as - 'rpsl_pk': '192.0.2.128/25,AS65547', - 'ip_first': '192.0.2.128', - 'prefix_length': 25, - 'asn_first': 23456, - 'source': 'TEST', - 'object_class': 'route', - 'object_text': 'text', - 'scopefilter_status': ScopeFilterStatus.out_scope_prefix, - }, - { - # Should become out_scope_as - 'rpsl_pk': 'AS65547', - 'asn_first': 23456, - 'source': 'TEST', - 'object_class': 'aut-num', - 'object_text': 'text', - 'scopefilter_status': ScopeFilterStatus.in_scope, - }, - { - # Should not change - 'rpsl_pk': '192.0.2.128/25,AS65548', - 'ip_first': '192.0.2.128', - 'prefix_length': 25, - 'asn_first': 65548, - 'source': 'TEST', - 'object_class': 'route', - 'object_text': 'text', - 'scopefilter_status': ScopeFilterStatus.in_scope, - }, - ] - mock_dh.execute_query = lambda query: mock_query_result + mock_query_result = iter([ + [ + { + # Should become in_scope + 'pk': '192.0.2.128/25,AS65547', + 'rpsl_pk': '192.0.2.128/25,AS65547', + 'ip_first': '192.0.2.128', + 'prefix_length': 25, + 'asn_first': 65547, + 'source': 'TEST', + 'object_class': 'route', + 'scopefilter_status': ScopeFilterStatus.out_scope_prefix, + }, + { + # Should become out_scope_prefix + 'pk': '192.0.2.0/25,AS65547', + 'rpsl_pk': '192.0.2.0/25,AS65547', + 'ip_first': '192.0.2.0', + 'prefix_length': 25, + 'asn_first': 65547, + 'source': 'TEST', + 'object_class': 'route', + 'scopefilter_status': ScopeFilterStatus.in_scope, + }, + { + # Should become out_scope_as + 'pk': '192.0.2.128/25,AS65547', + 'rpsl_pk': '192.0.2.128/25,AS65547', + 'ip_first': '192.0.2.128', + 'prefix_length': 25, + 'asn_first': 23456, + 'source': 'TEST', + 'object_class': 'route', + 'scopefilter_status': ScopeFilterStatus.out_scope_prefix, + }, + { + # Should become out_scope_as + 'pk': 'AS65547', + 'rpsl_pk': 'AS65547', + 'asn_first': 23456, + 'source': 'TEST', + 'object_class': 'aut-num', + 'object_text': 'text', + 'scopefilter_status': ScopeFilterStatus.in_scope, + }, + { + # Should not change + 'pk': '192.0.2.128/25,AS65548', + 'rpsl_pk': '192.0.2.128/25,AS65548', + 'ip_first': '192.0.2.128', + 'prefix_length': 25, + 'asn_first': 65548, + 'source': 'TEST', + 'object_class': 'route', + 'scopefilter_status': ScopeFilterStatus.in_scope, + }, + ], + [ + { + 'pk': '192.0.2.128/25,AS65547', + 'object_text': 'text-192.0.2.128/25,AS65547', + }, + { + 'pk': '192.0.2.0/25,AS65547', + 'object_text': 'text-192.0.2.0/25,AS65547', + }, + { + 'pk': '192.0.2.128/25,AS65547', + 'object_text': 'text-192.0.2.128/25,AS65547', + }, + { + 'pk': 'AS65547', + 'object_text': 'text-AS65547', + }, + ] + ]) + mock_dh.execute_query = lambda query: next(mock_query_result) validator = ScopeFilterValidator() result = validator.validate_all_rpsl_objects(mock_dh) @@ -178,15 +199,20 @@ def test_validate_all_rpsl_objects(self, config_override, monkeypatch): assert now_in_scope[0]['rpsl_pk'] == '192.0.2.128/25,AS65547' assert now_in_scope[0]['old_status'] == ScopeFilterStatus.out_scope_prefix + assert now_in_scope[0]['object_text'] == 'text-192.0.2.128/25,AS65547' assert now_out_scope_as[0]['rpsl_pk'] == '192.0.2.128/25,AS65547' assert now_out_scope_as[0]['old_status'] == ScopeFilterStatus.out_scope_prefix + assert now_out_scope_as[0]['object_text'] == 'text-192.0.2.128/25,AS65547' assert now_out_scope_as[1]['rpsl_pk'] == 'AS65547' assert now_out_scope_as[1]['old_status'] == ScopeFilterStatus.in_scope + assert now_out_scope_as[1]['object_text'] == 'text-AS65547' assert now_out_scope_prefix[0]['rpsl_pk'] == '192.0.2.0/25,AS65547' assert now_out_scope_prefix[0]['old_status'] == ScopeFilterStatus.in_scope + assert now_out_scope_prefix[0]['object_text'] == 'text-192.0.2.0/25,AS65547' assert flatten_mock_calls(mock_dq) == [ ['object_classes', (['route', 'route6', 'aut-num'],), {}], + ['pks', (['192.0.2.128/25,AS65547', '192.0.2.0/25,AS65547', '192.0.2.128/25,AS65547', 'AS65547'],), {}], ] diff --git a/irrd/scopefilter/validators.py b/irrd/scopefilter/validators.py index d6b496334..b682bc752 100644 --- a/irrd/scopefilter/validators.py +++ b/irrd/scopefilter/validators.py @@ -113,7 +113,7 @@ def validate_all_rpsl_objects(self, database_handler: DatabaseHandler) -> \ validation result, are not included in the return value. """ columns = ['rpsl_pk', 'ip_first', 'prefix_length', 'asn_first', 'source', 'object_class', - 'object_text', 'scopefilter_status', 'rpki_status'] + 'scopefilter_status', 'rpki_status'] objs_changed: Dict[ScopeFilterStatus, List[Dict[str, str]]] = defaultdict(list) @@ -136,6 +136,16 @@ def validate_all_rpsl_objects(self, database_handler: DatabaseHandler) -> \ if new_status != current_status: result['scopefilter_status'] = new_status objs_changed[new_status].append(result) + + # Object text is only retrieved for objects with state changes + pks_to_enrich = [obj['pk'] for objs in objs_changed.values() for obj in objs] + query = RPSLDatabaseQuery(['pk', 'object_text'], enable_ordering=False).pks(pks_to_enrich) + rows_per_pk = {row['pk']: row for row in database_handler.execute_query(query)} + + for rpsl_objs in objs_changed.values(): + for rpsl_obj in rpsl_objs: + rpsl_obj.update(rows_per_pk[rpsl_obj['pk']]) + return (objs_changed[ScopeFilterStatus.in_scope], objs_changed[ScopeFilterStatus.out_scope_as], objs_changed[ScopeFilterStatus.out_scope_prefix])