From 14b4649fa24ee0d58e351c106011fb1bace4a9bc Mon Sep 17 00:00:00 2001 From: Ahmed TAHRI Date: Tue, 24 Dec 2024 10:32:34 +0100 Subject: [PATCH] :bug: output(...) replace declarative mark using non iana compliant encoding name close #572 --- CHANGELOG.md | 5 ++++- src/charset_normalizer/models.py | 2 +- tests/test_preemptive_detection.py | 10 +++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d46598c6..f4d10b11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - `build-requirements.txt` as per using `pyproject.toml` native build configuration. - `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile). - `setup.cfg` in favor of `pyproject.toml` metadata configuration. -- unused `util.unicode_range` function. +- unused `utils.unicode_range` function. + +### Fixed +- converting content to Unicode bytes may insert non-IANA compliant encoding name (e.g. `utf_8` instead of `utf-8`). (#572) ## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08) diff --git a/src/charset_normalizer/models.py b/src/charset_normalizer/models.py index 2493fc92..1042758f 100644 --- a/src/charset_normalizer/models.py +++ b/src/charset_normalizer/models.py @@ -222,7 +222,7 @@ def output(self, encoding: str = "utf_8") -> bytes: RE_POSSIBLE_ENCODING_INDICATION, lambda m: m.string[m.span()[0] : m.span()[1]].replace( m.groups()[0], - iana_name(self._output_encoding), # type: ignore[arg-type] + iana_name(self._output_encoding).replace("_", "-"), # type: ignore[arg-type] ), decoded_string[:8192], count=1, diff --git a/tests/test_preemptive_detection.py b/tests/test_preemptive_detection.py index 64b52023..e56c4a16 100644 --- a/tests/test_preemptive_detection.py +++ b/tests/test_preemptive_detection.py @@ -34,7 +34,7 @@ def test_detect_most_common_body_encoding(payload, expected_encoding): [ ( b'', - b'', + b'', ), ( b'', @@ -51,19 +51,19 @@ def test_detect_most_common_body_encoding(payload, expected_encoding): ), ( b'', - b'', + b'', ), ( b'', - b'', + b'', ), ( b"", - b"", + b"", ), ( b'', - b'', + b'', ), ], )