From e65fe70ebedc05d859e31921fbf53a19b3ab9ac1 Mon Sep 17 00:00:00 2001 From: Sasha Romijn Date: Mon, 2 Dec 2024 14:09:48 +0100 Subject: [PATCH] Stricter validation on non-printable characters (#978) --- docs/releases/4.5.0.rst | 1 + irrd/rpsl/fields.py | 14 ++++++++++++-- irrd/rpsl/tests/test_fields.py | 3 ++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/releases/4.5.0.rst b/docs/releases/4.5.0.rst index 7bbb1af66..c9425b7fe 100644 --- a/docs/releases/4.5.0.rst +++ b/docs/releases/4.5.0.rst @@ -80,3 +80,4 @@ Other changes was added, to force an authoritative database into non-strict mode. As it's name notes, this disables many checks and all referential integrity and is therefore strongly discouraged. +* Most non-printable characters are now stripped from all RPSL attribute values. diff --git a/irrd/rpsl/fields.py b/irrd/rpsl/fields.py index 204822c73..5f50dde43 100644 --- a/irrd/rpsl/fields.py +++ b/irrd/rpsl/fields.py @@ -1,5 +1,6 @@ import datetime import re +import sys from typing import Optional from urllib.parse import urlparse @@ -18,7 +19,7 @@ # This regex is not designed to catch every possible invalid variation, # but rather meant to protect against unintentional mistakes. -# # Validate local-part @ domain | or IPv4 address | or IPv6 +# # Validate local-part @ domain | or IPv4 address | or IPv6 re_email = re.compile( r"^[A-Z0-9$!#%&\"*+\/=?^_`{|}~\\.-]+@(([A-Z0-9\\.-]+)|(\[\d+\.\d+\.\d+\.\d+\])|(\[[A-f\d:]+\]))$", re.IGNORECASE, @@ -54,6 +55,15 @@ ] reserved_prefixes = ["AS-", "RS-", "RTRS-", "FLTR-", "PRNG-"] +ALLOWED_CONTROL_CHARS = {"\n", "\r", "\t", "\u200d"} +NOPRINT_TRANS_TABLE = str.maketrans( + { + i: None + for i in range(0, sys.maxunicode + 1) + if not chr(i).isprintable() and chr(i) not in ALLOWED_CONTROL_CHARS + } +) + """ Fields for RPSL data. @@ -102,7 +112,7 @@ def __init__( def parse( self, value: str, messages: RPSLParserMessages, strict_validation=True ) -> Optional[RPSLFieldParseResult]: - return RPSLFieldParseResult(value) + return RPSLFieldParseResult(value.translate(NOPRINT_TRANS_TABLE)) class RPSLFieldListMixin: diff --git a/irrd/rpsl/tests/test_fields.py b/irrd/rpsl/tests/test_fields.py index eda7ba5a7..13d04527e 100644 --- a/irrd/rpsl/tests/test_fields.py +++ b/irrd/rpsl/tests/test_fields.py @@ -51,7 +51,8 @@ def assert_validation_err(expected_errors, callable, *args, **kwargs): def test_rpsl_text_field(): field = RPSLTextField() messages = RPSLParserMessages() - assert field.parse("AS-FOO$", messages).value, "AS-FOO$" + # U+200F is RTL marker, U+200B zero width space + assert field.parse("AS-FOO🎉🏳️‍🌈\u200f\u200b\x07$ \t", messages).value == "AS-FOO🎉🏳️‍🌈$ \t" assert not messages.errors()