Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow optional char detection dependencies in post-packaging #6702

Merged
merged 1 commit into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,23 @@ jobs:
- name: Run tests
run: |
make ci
no_chardet:
name: "No Character Detection"
runs-on: ubuntu-latest
strategy:
fail-fast: true

steps:
- uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
- name: 'Set up Python 3.8'
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d
with:
python-version: '3.8'
- name: Install dependencies
run: |
make
python -m pip uninstall -y "charset_normalizer" "chardet"
- name: Run tests
run: |
make ci
6 changes: 5 additions & 1 deletion src/requests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,11 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
# charset_normalizer >= 2.0.0 < 4.0.0
assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
else:
raise Exception("You need either charset_normalizer or chardet installed")
warnings.warn(
"Unable to find acceptable character detection dependency "
"(chardet or charset_normalizer).",
RequestsDependencyWarning,
)


def _check_cryptography(cryptography_version):
Expand Down
25 changes: 20 additions & 5 deletions src/requests/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,28 @@
compatibility until the next major version.
"""

try:
import chardet
except ImportError:
import charset_normalizer as chardet

import importlib
import sys

# -------------------
# Character Detection
# -------------------


def _resolve_char_detection():
"""Find supported character detection libraries."""
chardet = None
for lib in ("chardet", "charset_normalizer"):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this keeps the logic of the earlier code, I think that the library which is required on should be tried first. #6714 addresses this.

if chardet is None:
try:
chardet = importlib.import_module(lib)
except ImportError:
pass
return chardet


chardet = _resolve_char_detection()

# -------
# Pythons
# -------
Expand Down
7 changes: 6 additions & 1 deletion src/requests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,12 @@ def next(self):
@property
def apparent_encoding(self):
"""The apparent encoding, provided by the charset_normalizer or chardet libraries."""
return chardet.detect(self.content)["encoding"]
if chardet is not None:
return chardet.detect(self.content)["encoding"]
else:
# If no character detection library is available, we'll fall back
# to a standard Python utf-8 str.
return "utf-8"

def iter_content(self, chunk_size=1, decode_unicode=False):
"""Iterates over the response data. When stream=True is set on the
Expand Down
25 changes: 9 additions & 16 deletions src/requests/packages.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
import sys

try:
import chardet
except ImportError:
import warnings

import charset_normalizer as chardet

warnings.filterwarnings("ignore", "Trying to detect", module="charset_normalizer")
from .compat import chardet

# This code exists for backwards compatibility reasons.
# I don't like it either. Just look the other way. :)
Expand All @@ -20,11 +13,11 @@
if mod == package or mod.startswith(f"{package}."):
sys.modules[f"requests.packages.{mod}"] = sys.modules[mod]

target = chardet.__name__
for mod in list(sys.modules):
if mod == target or mod.startswith(f"{target}."):
imported_mod = sys.modules[mod]
sys.modules[f"requests.packages.{mod}"] = imported_mod
mod = mod.replace(target, "chardet")
sys.modules[f"requests.packages.{mod}"] = imported_mod
# Kinda cool, though, right?
if chardet is not None:
target = chardet.__name__
for mod in list(sys.modules):
if mod == target or mod.startswith(f"{target}."):
imported_mod = sys.modules[mod]
sys.modules[f"requests.packages.{mod}"] = imported_mod
mod = mod.replace(target, "chardet")
sys.modules[f"requests.packages.{mod}"] = imported_mod
Loading