From c36513f1be2ef3d3cec864accbffda1afaa06ffd Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 21 May 2024 09:44:41 +1200 Subject: [PATCH 001/145] [rh:requests] Update to `requests` 2.32.0 (#9980) Authored by: coletdjnz --- pyproject.toml | 2 +- yt_dlp/networking/_requests.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5fadd14495ae..74d7ff323fe1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "certifi", "mutagen", "pycryptodomex", - "requests>=2.31.0,<3", + "requests>=2.32.0,<3", "urllib3>=1.26.17,<3", "websockets>=12.0", ] diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index e3edc77f3803..75eee8824675 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -21,8 +21,8 @@ if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023100: - raise ImportError('Only requests >= 2.31.0 is supported') +if requests.__build__ < 0x023200: + raise ImportError('Only requests >= 2.32.0 is supported') import requests.adapters import requests.utils @@ -181,9 +181,13 @@ def proxy_manager_for(self, proxy, **proxy_kwargs): return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) def cert_verify(*args, **kwargs): - # lean on SSLContext for cert verification + # Lean on our SSLContext for cert verification pass + def _get_connection(self, request, *_, proxies=None, **__): + # Lean on our SSLContext for cert verification + return self.get_connection(request.url, proxies) + class RequestsSession(requests.sessions.Session): """ From 6e36d17f404556f0e3a43f441c477a71a91877d9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 20 May 2024 18:01:17 -0500 Subject: [PATCH 002/145] [build] Exclude `requests` from `py2exe` (#9982) Authored by: bashonly --- README.md | 2 +- bundle/py2exe.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cdd57b024c10..ad98af7c459b 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati ### Standalone Py2Exe Builds (Windows) -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 2811674925b0..403de0024116 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -42,9 +42,9 @@ def main(): # py2exe cannot import Crypto 'Crypto', 'Cryptodome', - # py2exe appears to confuse this with our socks library. - # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. - 'urllib3.contrib.socks' + # py2exe builds fail to run with requests >=2.32.0 + 'requests', + 'urllib3' ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here From 3584b8390bd21c0393a3079eeee71aed56a1c1d8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 20 May 2024 18:09:28 -0500 Subject: [PATCH 003/145] [ie/tiktok] Add `device_id` extractor-arg (#9951) Authored by: bashonly --- README.md | 1 + yt_dlp/extractor/tiktok.py | 31 +++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ad98af7c459b..1029d1a6d9c0 100644 --- a/README.md +++ b/README.md @@ -1815,6 +1815,7 @@ The following extractors use this feature: * `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020` * `aid`: Default app ID to use with mobile API calls, e.g. `1180` * `app_info`: Enable mobile API extraction with one or more app info strings in the format of `/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001` +* `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string #### rokfinchannel * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 2fb41ba79469..6d0d7eea3487 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,8 +1,8 @@ +import functools import itertools import json import random import re -import string import time import uuid @@ -15,6 +15,7 @@ UnsupportedError, UserNotLive, determine_ext, + filter_dict, format_field, int_or_none, join_nonempty, @@ -49,11 +50,21 @@ class TikTokBaseIE(InfoExtractor): _APP_INFO = None _APP_USER_AGENT = None - @property + @functools.cached_property def _KNOWN_APP_INFO(self): - return self._configuration_arg('app_info', ie_key=TikTokIE) + # If we have a genuine device ID, we may not need any IID + default = [''] if self._KNOWN_DEVICE_ID else [] + return self._configuration_arg('app_info', default, ie_key=TikTokIE) - @property + @functools.cached_property + def _KNOWN_DEVICE_ID(self): + return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0] + + @functools.cached_property + def _DEVICE_ID(self): + return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000)) + + @functools.cached_property def _API_HOSTNAME(self): return self._configuration_arg( 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0] @@ -115,7 +126,7 @@ def _call_api_impl(self, ep, query, video_id, fatal=True, }, query=query) def _build_api_query(self, query): - return { + return filter_dict({ **query, 'device_platform': 'android', 'os': 'android', @@ -156,10 +167,10 @@ def _build_api_query(self, query): 'build_number': self._APP_INFO['app_version'], 'region': 'US', 'ts': int(time.time()), - 'iid': self._APP_INFO['iid'], - 'device_id': random.randint(7250000000000000000, 7351147085025500000), + 'iid': self._APP_INFO.get('iid'), + 'device_id': self._DEVICE_ID, 'openudid': ''.join(random.choices('0123456789abcdef', k=16)), - } + }) def _call_api(self, ep, query, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): @@ -848,7 +859,7 @@ def _video_entries_api(self, webpage, user_id, username): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } for page in itertools.count(1): @@ -896,7 +907,7 @@ def _entries(self, list_id, display_id): 'cursor': 0, 'count': 20, 'type': 5, - 'device_id': ''.join(random.choices(string.digits, k=19)) + 'device_id': self._DEVICE_ID, } for page in itertools.count(1): From 4ccd73fea0f6f4be343e1ec7f22dd03799addcf8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 20 May 2024 18:11:24 -0500 Subject: [PATCH 004/145] [ie/tiktok] Extract all web formats (#9960) Closes #9506 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 122 ++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 6d0d7eea3487..c96fa5038868 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -20,6 +20,8 @@ int_or_none, join_nonempty, merge_dicts, + mimetype2ext, + parse_qs, qualities, remove_start, srt_subtitles_timecode, @@ -250,23 +252,22 @@ def _get_subtitles(self, aweme_detail, aweme_id): }) return subtitles + def _parse_url_key(self, url_key): + format_id, codec, res, bitrate = self._search_regex( + r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, + 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) + if not format_id: + return {}, None + return { + 'format_id': format_id, + 'vcodec': 'h265' if codec == 'bytevc1' else codec, + 'tbr': int_or_none(bitrate, scale=1000) or None, + 'quality': qualities(self.QUALITIES)(res), + }, res + def _parse_aweme_video_app(self, aweme_detail): aweme_id = aweme_detail['aweme_id'] video_info = aweme_detail['video'] - - def parse_url_key(url_key): - format_id, codec, res, bitrate = self._search_regex( - r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, - 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) - if not format_id: - return {}, None - return { - 'format_id': format_id, - 'vcodec': 'h265' if codec == 'bytevc1' else codec, - 'tbr': int_or_none(bitrate, scale=1000) or None, - 'quality': qualities(self.QUALITIES)(res), - }, res - known_resolutions = {} def audio_meta(url): @@ -281,7 +282,7 @@ def audio_meta(url): } if ext == 'mp3' or '-music-' in url else {} def extract_addr(addr, add_meta={}): - parsed_meta, res = parse_url_key(addr.get('url_key', '')) + parsed_meta, res = self._parse_url_key(addr.get('url_key', '')) is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2' if res: known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height'))) @@ -295,7 +296,7 @@ def extract_addr(addr, add_meta={}): 'acodec': 'aac', 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked **add_meta, **parsed_meta, - # bytevc2 is bytedance's proprietary (unplayable) video codec + # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable 'preference': -100 if is_bytevc2 else -1, 'format_note': join_nonempty( add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, @@ -307,6 +308,7 @@ def extract_addr(addr, add_meta={}): formats = [] width = int_or_none(video_info.get('width')) height = int_or_none(video_info.get('height')) + ratio = try_call(lambda: width / height) or 0.5625 if video_info.get('play_addr'): formats.extend(extract_addr(video_info['play_addr'], { 'format_id': 'play_addr', @@ -323,8 +325,8 @@ def extract_addr(addr, add_meta={}): 'format_id': 'download_addr', 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), 'vcodec': 'h264', - 'width': dl_width or width, - 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong + 'width': dl_width, + 'height': try_call(lambda: int(dl_width / ratio)), # download_addr['height'] is wrong 'preference': -2 if video_info.get('has_watermark') else -1, })) if video_info.get('play_addr_h264'): @@ -431,26 +433,88 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): formats = [] width = int_or_none(video_info.get('width')) height = int_or_none(video_info.get('height')) + ratio = try_call(lambda: width / height) or 0.5625 + COMMON_FORMAT_INFO = { + 'ext': 'mp4', + 'vcodec': 'h264', + 'acodec': 'aac', + } + + for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])): + format_info, res = self._parse_url_key( + traverse_obj(bitrate_info, ('PlayAddr', 'UrlKey', {str})) or '') + # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable + is_bytevc2 = format_info.get('vcodec') == 'bytevc2' + format_info.update({ + 'format_note': 'UNPLAYABLE' if is_bytevc2 else None, + 'preference': -100 if is_bytevc2 else -1, + 'filesize': traverse_obj(bitrate_info, ('PlayAddr', 'DataSize', {int_or_none})), + }) + + if dimension := (res and int(res[:-1])): + if dimension == 540: # '540p' is actually 576p + dimension = 576 + if ratio < 1: # portrait: res/dimension is width + y = int(dimension / ratio) + format_info.update({ + 'width': dimension, + 'height': y - (y % 2), + }) + else: # landscape: res/dimension is height + x = int(dimension * ratio) + format_info.update({ + 'width': x - (x % 2), + 'height': dimension, + }) + + for video_url in traverse_obj(bitrate_info, ('PlayAddr', 'UrlList', ..., {url_or_none})): + formats.append({ + **COMMON_FORMAT_INFO, + **format_info, + 'url': self._proto_relative_url(video_url), + }) + + # We don't have res string for play formats, but need quality for sorting & de-duplication + play_quality = traverse_obj(formats, (lambda _, v: v['width'] == width, 'quality', any)) for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})): formats.append({ + **COMMON_FORMAT_INFO, + 'format_id': 'play', 'url': self._proto_relative_url(play_url), - 'ext': 'mp4', 'width': width, 'height': height, + 'quality': play_quality, }) for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})): formats.append({ + **COMMON_FORMAT_INFO, 'format_id': 'download', 'url': self._proto_relative_url(download_url), - 'ext': 'mp4', - 'width': width, - 'height': height, }) self._remove_duplicate_formats(formats) + for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']): + f.update({ + 'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '), + 'preference': f.get('preference') or -2, + }) + + # Is it a slideshow with only audio for download? + if not formats and traverse_obj(music_info, ('playUrl', {url_or_none})): + audio_url = music_info['playUrl'] + ext = traverse_obj(parse_qs(audio_url), ( + 'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a' + formats.append({ + 'format_id': 'audio', + 'url': self._proto_relative_url(audio_url), + 'ext': ext, + 'acodec': 'aac' if ext == 'm4a' else ext, + 'vcodec': 'none', + }) + thumbnails = [] for thumb_url in traverse_obj(aweme_detail, ( (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})): @@ -462,10 +526,17 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): return { 'id': video_id, + **traverse_obj(music_info, { + 'track': ('title', {str}), + 'album': ('album', {str}, {lambda x: x or None}), + 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), + 'duration': ('duration', {int_or_none}), + }), **traverse_obj(aweme_detail, { 'title': ('desc', {str}), 'description': ('desc', {str}), - 'duration': ('video', 'duration', {int_or_none}), + # audio-only slideshows have a video duration of 0 and an actual audio duration + 'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}), 'timestamp': ('createTime', {int_or_none}), }), **traverse_obj(author_info or aweme_detail, { @@ -480,11 +551,6 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): 'repost_count': 'shareCount', 'comment_count': 'commentCount', }, expected_type=int_or_none), - **traverse_obj(music_info, { - 'track': ('title', {str}), - 'album': ('album', {str}, {lambda x: x or None}), - 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), - }), 'channel_id': channel_id, 'uploader_url': user_url, 'formats': formats, From 3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Wed, 22 May 2024 16:22:25 +0200 Subject: [PATCH 005/145] [rh:requests] Patch support for `requests` 2.32.2+ (#9992) Authored by: Grub4K --- .github/workflows/build.yml | 14 +++++++++++--- README.md | 2 +- bundle/py2exe.py | 6 +++--- pyproject.toml | 7 +++++-- yt_dlp/networking/_requests.py | 20 ++++++++++++++++---- 5 files changed, 36 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d9352fedd871..55cf3b3a271c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -360,7 +360,7 @@ jobs: - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build - python devscripts/install_deps.py --include py2exe --include curl-cffi + python devscripts/install_deps.py --include curl-cffi python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" - name: Prepare @@ -369,12 +369,20 @@ jobs: python devscripts/make_lazy_extractors.py - name: Build run: | - python -m bundle.py2exe - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir + Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip + - name: Install Requirements (py2exe) + run: | + python devscripts/install_deps.py --include py2exe + - name: Build (py2exe) + run: | + python -m bundle.py2exe + Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe + Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe + - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | diff --git a/README.md b/README.md index 1029d1a6d9c0..2c909976ace7 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati ### Standalone Py2Exe Builds (Windows) -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 403de0024116..2811674925b0 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -42,9 +42,9 @@ def main(): # py2exe cannot import Crypto 'Crypto', 'Cryptodome', - # py2exe builds fail to run with requests >=2.32.0 - 'requests', - 'urllib3' + # py2exe appears to confuse this with our socks library. + # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. + 'urllib3.contrib.socks' ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here diff --git a/pyproject.toml b/pyproject.toml index 74d7ff323fe1..b9a36ba6d70b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "certifi", "mutagen", "pycryptodomex", - "requests>=2.32.0,<3", + "requests>=2.31.0,<3", "urllib3>=1.26.17,<3", "websockets>=12.0", ] @@ -73,7 +73,10 @@ pyinstaller = [ "pyinstaller>=6.3; sys_platform!='darwin'", "pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi ] -py2exe = ["py2exe>=0.12"] +py2exe = [ + "py2exe>=0.12", + "requests==2.31.*", +] [project.urls] Documentation = "https://github.com/yt-dlp/yt-dlp#readme" diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 75eee8824675..6397a2c0ca92 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -21,13 +21,14 @@ if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023200: - raise ImportError('Only requests >= 2.32.0 is supported') +if requests.__build__ < 0x023100: + raise ImportError('Only requests >= 2.31.0 is supported') import requests.adapters import requests.utils import urllib3.connection import urllib3.exceptions +import urllib3.util from ._helper import ( InstanceStoreMixin, @@ -180,14 +181,25 @@ def proxy_manager_for(self, proxy, **proxy_kwargs): extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) + # Skip `requests` internal verification; we use our own SSLContext + # requests 2.31.0+ def cert_verify(*args, **kwargs): - # Lean on our SSLContext for cert verification pass + # requests 2.31.0-2.32.1 def _get_connection(self, request, *_, proxies=None, **__): - # Lean on our SSLContext for cert verification return self.get_connection(request.url, proxies) + # requests 2.32.2+: Reimplementation without `_urllib3_request_context` + def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None): + url = urllib3.util.parse_url(request.url).url + + manager = self.poolmanager + if proxy := select_proxy(url, proxies): + manager = self.proxy_manager_for(proxy) + + return manager.connection_from_url(url) + class RequestsSession(requests.sessions.Session): """ From 78c57cc0e0998b8ed90e4306f410aa4be4115cd7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 09:30:25 -0500 Subject: [PATCH 006/145] [build] `macos` job requires `setuptools<70` (#9993) Authored by: bashonly --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b9a36ba6d70b..8e3bce4bfc48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ build = [ "build", "hatchling", "pip", + "setuptools>=66.1.0,<70", "wheel", ] dev = [ From eef1e9f44ff14c5e65b759bb1eafa3946cdaf719 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 17:17:10 -0500 Subject: [PATCH 007/145] [ie/tiktok] Fix subtitles extraction (#9961) Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 56 ++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c96fa5038868..7772dd1f281f 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -212,7 +212,31 @@ def _extract_aweme_app(self, aweme_id): raise ExtractorError('Unable to find video in feed', video_id=aweme_id) return self._parse_aweme_video_app(aweme_detail) - def _get_subtitles(self, aweme_detail, aweme_id): + def _extract_web_data_and_status(self, url, video_id, fatal=True): + webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or '' + video_data, status = {}, None + + if universal_data := self._get_universal_data(webpage, video_id): + self.write_debug('Found universal data for rehydration') + status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 + video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) + + elif sigi_data := self._get_sigi_state(webpage, video_id): + self.write_debug('Found sigi state data') + status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 + video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) + + elif next_data := self._search_nextjs_data(webpage, video_id, default={}): + self.write_debug('Found next.js data') + status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 + video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) + + elif fatal: + raise ExtractorError('Unable to extract webpage video data') + + return video_data, status + + def _get_subtitles(self, aweme_detail, aweme_id, user_url): # TODO: Extract text positioning info subtitles = {} # aweme/detail endpoint subs @@ -243,9 +267,10 @@ def _get_subtitles(self, aweme_detail, aweme_id): }) # webpage subs if not subtitles: - for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict): - if not caption.get('Url'): - continue + if user_url: # only _parse_aweme_video_app needs to extract the webpage here + aweme_detail, _ = self._extract_web_data_and_status( + f'{user_url}/video/{aweme_id}', aweme_id, fatal=False) + for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ 'ext': remove_start(caption.get('Format'), 'web'), 'url': caption['Url'], @@ -412,7 +437,7 @@ def extract_addr(addr, add_meta={}): 'album': str_or_none(music_info.get('album')) or None, 'artists': re.split(r'(?:, | & )', music_author) if music_author else None, 'formats': formats, - 'subtitles': self.extract_subtitles(aweme_detail, aweme_id), + 'subtitles': self.extract_subtitles(aweme_detail, aweme_id, user_url), 'thumbnails': thumbnails, 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000), 'availability': self._availability( @@ -554,6 +579,7 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): 'channel_id': channel_id, 'uploader_url': user_url, 'formats': formats, + 'subtitles': self.extract_subtitles(aweme_detail, video_id, None), 'thumbnails': thumbnails, 'http_headers': { 'Referer': webpage_url, @@ -839,25 +865,7 @@ def _real_extract(self, url): self.report_warning(f'{e}; trying with webpage') url = self._create_url(user_id, video_id) - webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}) - - if universal_data := self._get_universal_data(webpage, video_id): - self.write_debug('Found universal data for rehydration') - status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 - video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) - - elif sigi_data := self._get_sigi_state(webpage, video_id): - self.write_debug('Found sigi state data') - status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 - video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) - - elif next_data := self._search_nextjs_data(webpage, video_id, default={}): - self.write_debug('Found next.js data') - status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 - video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) - - else: - raise ExtractorError('Unable to extract webpage video data') + video_data, status = self._extract_web_data_and_status(url, video_id) if video_data and status == 0: return self._parse_aweme_video_web(video_data, url, video_id) From beaf832c7a9d57833f365ce18f6115b88071b296 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 17:20:29 -0500 Subject: [PATCH 008/145] [ie/soundcloud] Add `formats` extractor-arg (#10004) Authored by: bashonly --- README.md | 3 ++ yt_dlp/extractor/soundcloud.py | 58 +++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 2c909976ace7..887cfde2319a 100644 --- a/README.md +++ b/README.md @@ -1841,6 +1841,9 @@ The following extractors use this feature: #### afreecatvlive * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` +#### soundcloud +* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index c9ca41a5cdc9..358146171f12 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,3 +1,4 @@ +import functools import itertools import json import re @@ -12,6 +13,7 @@ error_to_compat_str, float_or_none, int_or_none, + join_nonempty, mimetype2ext, parse_qs, str_or_none, @@ -68,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor): 'original': 0, } + _DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3'] + + @functools.cached_property + def _is_requested(self): + return re.compile(r'|'.join(set( + re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default' + else '|'.join(map(re.escape, self._DEFAULT_FORMATS)) + for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE) + ))).fullmatch + def _store_client_id(self, client_id): self.cache.store('soundcloud', 'client_id', client_id) @@ -216,7 +228,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') if redirect_url: urlh = self._request_webpage( - HEADRequest(redirect_url), track_id, fatal=False) + HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) if urlh: format_url = urlh.url format_urls.add(format_url) @@ -258,7 +270,7 @@ def add_format(f, protocol, is_preview=False): abr = f.get('abr') if abr: f['abr'] = int(abr) - if protocol == 'hls': + if protocol in ('hls', 'hls-aes'): protocol = 'm3u8' if ext == 'aac' else 'm3u8_native' else: protocol = 'http' @@ -274,11 +286,32 @@ def add_format(f, protocol, is_preview=False): if extract_flat: break format_url = t['url'] - stream = None + protocol = traverse_obj(t, ('format', 'protocol', {str})) + if protocol == 'progressive': + protocol = 'http' + if protocol != 'hls' and '/hls' in format_url: + protocol = 'hls' + if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url: + protocol = 'hls-aes' + + ext = None + if preset := traverse_obj(t, ('preset', {str_or_none})): + ext = preset.split('_')[0] + if ext not in KNOWN_EXTENSIONS: + ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str}))) + + identifier = join_nonempty(protocol, ext, delim='_') + if not self._is_requested(identifier): + self.write_debug(f'"{identifier}" is not a requested format, skipping') + continue + + stream = None for retry in self.RetryManager(fatal=False): try: - stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS) + stream = self._download_json( + format_url, track_id, f'Downloading {identifier} format info JSON', + query=query, headers=self._HEADERS) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 429: self.report_warning( @@ -289,27 +322,14 @@ def add_format(f, protocol, is_preview=False): else: self.report_warning(e.msg) - if not isinstance(stream, dict): - continue - stream_url = url_or_none(stream.get('url')) + stream_url = traverse_obj(stream, ('url', {url_or_none})) if invalid_url(stream_url): continue format_urls.add(stream_url) - stream_format = t.get('format') or {} - protocol = stream_format.get('protocol') - if protocol != 'hls' and '/hls' in format_url: - protocol = 'hls' - ext = None - preset = str_or_none(t.get('preset')) - if preset: - ext = preset.split('_')[0] - if ext not in KNOWN_EXTENSIONS: - ext = mimetype2ext(stream_format.get('mime_type')) add_format({ 'url': stream_url, 'ext': ext, - }, 'http' if protocol == 'progressive' else protocol, - t.get('snipped') or '/preview/' in format_url) + }, protocol, t.get('snipped') or '/preview/' in format_url) for f in formats: f['vcodec'] = 'none' From f2816634e3be88fe158b342ee33918de3c272a54 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 17:25:07 -0500 Subject: [PATCH 009/145] [ie/crunchyroll] Fix stream extraction (#10005) Closes #9994 Authored by: bashonly --- yt_dlp/extractor/crunchyroll.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 90967c1607ce..ea54f019511c 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -2,6 +2,7 @@ import uuid from .common import InfoExtractor +from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -24,6 +25,7 @@ class CrunchyrollBaseIE(InfoExtractor): _BASE_URL = 'https://www.crunchyroll.com' _API_BASE = 'https://api.crunchyroll.com' _NETRC_MACHINE = 'crunchyroll' + _SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27' _REFRESH_TOKEN = None _AUTH_HEADERS = None _AUTH_EXPIRY = None @@ -179,10 +181,19 @@ def _extract_stream(self, identifier, display_id=None): display_id = identifier self._update_auth() - stream_response = self._download_json( - f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play', - display_id, note='Downloading stream info', errnote='Failed to download stream info', - headers=CrunchyrollBaseIE._AUTH_HEADERS) + headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT} + try: + stream_response = self._download_json( + f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play', + display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers) + except ExtractorError as error: + if self.get_param('ignore_no_formats_error'): + self.report_warning(error.orig_msg) + return [], {} + elif isinstance(error.cause, HTTPError) and error.cause.status == 420: + raise ExtractorError( + 'You have reached the rate-limit for active streams; try again later', expected=True) + raise available_formats = {'': ('', '', stream_response['url'])} for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])): @@ -211,7 +222,7 @@ def _extract_stream(self, identifier, display_id=None): fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest') self._merge_subtitles(dash_subs, target=subtitles) else: - continue # XXX: Update this if/when meta mpd formats are working + continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation for f in adaptive_formats: if f.get('acodec') != 'none': f['language'] = audio_locale @@ -221,6 +232,15 @@ def _extract_stream(self, identifier, display_id=None): for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)): subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})) + # Invalidate stream token to avoid rate-limit + error_msg = 'Unable to invalidate stream token; you may experience rate-limiting' + if stream_token := stream_response.get('token'): + self._request_webpage(Request( + f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive', + headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False) + else: + self.report_warning(error_msg) + return formats, subtitles From 7b5674949fd03a33b47b67b31d56a5adf1c48c91 Mon Sep 17 00:00:00 2001 From: vtexier Date: Thu, 23 May 2024 01:09:58 +0200 Subject: [PATCH 010/145] [ie/ArteTV] Label forced subtitles (#9945) Authored by: vtexier --- yt_dlp/extractor/arte.py | 64 +++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 1c180b1fd5b6..46fe006cc93b 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -5,6 +5,7 @@ ExtractorError, GeoRestrictedError, int_or_none, + join_nonempty, parse_iso8601, parse_qs, strip_or_none, @@ -31,20 +32,6 @@ class ArteTVIE(ArteTVBaseIE): _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, - }, { - 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/', - 'info_dict': { - 'id': '100103-000-A', - 'title': 'USA: Dyskryminacja na porodówce', - 'description': 'md5:242017b7cce59ffae340a54baefcafb1', - 'alt_title': 'ARTE Reportage', - 'upload_date': '20201103', - 'duration': 554, - 'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530', - 'timestamp': 1604417980, - 'ext': 'mp4', - }, - 'params': {'skip_download': 'm3u8'} }, { 'note': 'No alt_title', 'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/', @@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE): }, { 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', 'only_matching': True, + }, { + 'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/', + 'info_dict': { + 'id': '109067-000-A', + 'ext': 'mp4', + 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739', + 'timestamp': 1713927600, + 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530', + 'duration': 7599, + 'title': 'La loi de Téhéran', + 'upload_date': '20240424', + 'subtitles': { + 'fr': 'mincount:1', + 'fr-acc': 'mincount:1', + 'fr-forced': 'mincount:1', + }, + }, }, { 'note': 'age-restricted', 'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/', @@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE): 'upload_date': '20230930', 'ext': 'mp4', }, - }, { - 'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/', - 'info_dict': { - 'id': '085374-003-A', - 'ext': 'mp4', - 'description': 'md5:ab79ec7cc472a93164415b4e4916abf9', - 'timestamp': 1702872000, - 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530', - 'duration': 2594, - 'title': 'Die kurze Zeit der Jugend', - 'alt_title': 'Im hohen Norden geboren', - 'upload_date': '20231218', - 'subtitles': { - 'fr': 'mincount:1', - 'fr-acc': 'mincount:1', - }, - }, + 'skip': '404 Not Found', }] _GEO_BYPASS = True @@ -143,16 +131,18 @@ def _fix_accessible_subs_locale(subs): updated_subs = {} for lang, sub_formats in subs.items(): for fmt in sub_formats: - if fmt.get('url', '').endswith('-MAL.m3u8'): - lang += '-acc' - updated_subs.setdefault(lang, []).append(fmt) + url = fmt.get('url') or '' + suffix = ('acc' if url.endswith('-MAL.m3u8') + else 'forced' if '_VO' not in url + else None) + updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt) return updated_subs def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') lang = mobj.group('lang') or mobj.group('lang_2') - langauge_code = self._LANG_MAP.get(lang) + language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ 'x-validated-age': '18' @@ -180,10 +170,10 @@ def _real_extract(self, url): m = self._VERSION_CODE_RE.match(stream_version_code) if m: lang_pref = int(''.join('01'[x] for x in ( - m.group('vlang') == langauge_code, # we prefer voice in the requested language + m.group('vlang') == language_code, # we prefer voice in the requested language not m.group('audio_desc'), # and not the audio description version bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice - m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language + m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language not m.group('has_sub'), # but we prefer no subtitles otherwise not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles ))) From 296df0da1d38a44d34c99b60a18066c301774537 Mon Sep 17 00:00:00 2001 From: panatexxa <91012623+panatexxa@users.noreply.github.com> Date: Thu, 23 May 2024 06:03:55 +0200 Subject: [PATCH 011/145] [ie/Moviepilot] Fix extractor (#9366) Authored by: panatexxa --- yt_dlp/extractor/moviepilot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 668c0984eb8f..35c57bc70334 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -14,7 +14,7 @@ class MoviepilotIE(InfoExtractor): 'display_id': 'interstellar-2', 'ext': 'mp4', 'title': 'Interstellar', - 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1ZganMw4HVXg/x1080', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1.*/x1080', 'timestamp': 1605010596, 'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c', 'uploader': 'Moviepilot', @@ -71,7 +71,7 @@ class MoviepilotIE(InfoExtractor): 'age_limit': 0, 'duration': 82, 'upload_date': '20201109', - 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Zg3lxLv9j5u/x1080', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Z.*/x1080', 'uploader': 'Moviepilot', 'like_count': int, 'view_count': int, @@ -92,6 +92,6 @@ def _real_extract(self, url): 'ie_key': DailymotionIE.ie_key(), 'display_id': video_id, 'title': clip.get('title'), - 'url': f'https://www.dailymotion.com/video/{clip["videoRemoteId"]}', + 'url': f'https://www.dailymotion.com/video/{clip["video"]["remoteId"]}', 'description': clip.get('summary'), } From 06cb0638392b607b47d3c2ac48eb2ebecb0f060d Mon Sep 17 00:00:00 2001 From: "Amir Y. Perehodnik" Date: Thu, 23 May 2024 07:07:20 +0300 Subject: [PATCH 012/145] [ie/Instagram] Support `/reels/` URLs (#9539) Closes #6689 Authored by: amir16yp --- yt_dlp/extractor/instagram.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index f7f21505ea9e..46f9cd681b15 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -255,7 +255,7 @@ def _real_extract(self, url): class InstagramIE(InstagramBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P[^/?#&]+))' _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1'] _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', @@ -379,6 +379,9 @@ class InstagramIE(InstagramBaseIE): }, { 'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/', 'only_matching': True, + }, { + 'url': 'https://www.instagram.com/reels/Cop84x6u7CP/', + 'only_matching': True, }] @classmethod From 65e709d23530959075816e966c42179ad46e8e3b Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 23 May 2024 12:09:21 +0800 Subject: [PATCH 013/145] [ie/GodResource] Add extractor (#9629) Closes #9551 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/godresource.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 yt_dlp/extractor/godresource.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index cf408b68288e..91a876b22da7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -715,6 +715,7 @@ from .gmanetwork import GMANetworkVideoIE from .go import GoIE from .godtube import GodTubeIE +from .godresource import GodResourceIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py new file mode 100644 index 000000000000..f010fff36083 --- /dev/null +++ b/yt_dlp/extractor/godresource.py @@ -0,0 +1,79 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + str_or_none, + unified_timestamp, + url_or_none +) +from ..utils.traversal import traverse_obj + + +class GodResourceIE(InfoExtractor): + _VALID_URL = r'https?://new\.godresource\.com/video/(?P\w+)' + _TESTS = [{ + # hls stream + 'url': 'https://new.godresource.com/video/A01mTKjyf6w', + 'info_dict': { + 'id': 'A01mTKjyf6w', + 'ext': 'mp4', + 'view_count': int, + 'timestamp': 1710978666, + 'channel_id': '5', + 'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg', + 'channel': 'Stedfast Baptist Church', + 'upload_date': '20240320', + 'title': 'GodResource video #A01mTKjyf6w', + } + }, { + # mp4 link + 'url': 'https://new.godresource.com/video/01DXmBbQv_X', + 'md5': '0e8f72aa89a106b9d5c011ba6f8717b7', + 'info_dict': { + 'id': '01DXmBbQv_X', + 'ext': 'mp4', + 'channel_id': '12', + 'view_count': int, + 'timestamp': 1687996800, + 'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg', + 'channel': 'Documentaries', + 'title': 'The Sodomite Deception', + 'upload_date': '20230629', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + api_data = self._download_json( + f'https://api.godresource.com/api/Streams/{display_id}', display_id) + + video_url = api_data['streamUrl'] + is_live = api_data.get('isLive') or False + if (ext := determine_ext(video_url)) == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_url, display_id, live=is_live) + elif ext == 'mp4': + formats, subtitles = [{ + 'url': video_url, + 'ext': ext + }], {} + else: + raise ExtractorError(f'Unexpected video format {ext}') + + return { + 'id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': '', + 'is_live': is_live, + **traverse_obj(api_data, { + 'title': ('title', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + 'view_count': ('views', {int}), + 'channel': ('channelName', {str}), + 'channel_id': ('channelId', {str_or_none}), + 'timestamp': ('streamDateCreated', {unified_timestamp}), + 'modified_timestamp': ('streamDataModified', {unified_timestamp}) + }) + } From be7db1a5a8c483726c511c30ea4689cbb8b27962 Mon Sep 17 00:00:00 2001 From: six Date: Thu, 23 May 2024 00:13:00 -0400 Subject: [PATCH 014/145] [ie/NTSLive] Add extractor (#9641) Closes #9640 Authored by: lostfictions --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/nts.py | 76 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 yt_dlp/extractor/nts.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 91a876b22da7..9dfa28c4bb4b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1333,6 +1333,7 @@ NRKTVSeriesIE, ) from .nrl import NRLTVIE +from .nts import NTSLiveIE from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/yt_dlp/extractor/nts.py b/yt_dlp/extractor/nts.py new file mode 100644 index 000000000000..a801740fa530 --- /dev/null +++ b/yt_dlp/extractor/nts.py @@ -0,0 +1,76 @@ +from .common import InfoExtractor +from ..utils import parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj + + +class NTSLiveIE(InfoExtractor): + IE_NAME = 'nts.live' + _VALID_URL = r'https?://(?:www\.)?nts\.live/shows/[^/?#]+/episodes/(?P[^/?#]+)' + _TESTS = [ + { + # embedded soundcloud + 'url': 'https://www.nts.live/shows/yu-su/episodes/yu-su-2nd-april-2024', + 'md5': 'b5444c04888c869d68758982de1a27d8', + 'info_dict': { + 'id': '1791563518', + 'ext': 'opus', + 'uploader_id': '995579326', + 'title': 'Pender Street Steppers & YU SU', + 'timestamp': 1712073600, + 'upload_date': '20240402', + 'thumbnail': 'https://i1.sndcdn.com/artworks-qKcNO0z0AQGGbv9s-GljJCw-original.jpg', + 'license': 'all-rights-reserved', + 'repost_count': int, + 'uploader_url': 'https://soundcloud.com/user-643553014', + 'uploader': 'NTS Latest', + 'description': 'md5:cd00ac535a63caaad722483ae3ff802a', + 'duration': 10784.157, + 'genres': ['Deep House', 'House', 'Leftfield Disco', 'Jazz Fusion', 'Dream Pop'], + 'modified_timestamp': 1712564687, + 'modified_date': '20240408', + }, + }, + { + # embedded mixcloud + 'url': 'https://www.nts.live/shows/absolute-fiction/episodes/absolute-fiction-23rd-july-2022', + 'info_dict': { + 'id': 'NTSRadio_absolute-fiction-23rd-july-2022', + 'ext': 'webm', + 'like_count': int, + 'title': 'Absolute Fiction', + 'comment_count': int, + 'uploader_url': 'https://www.mixcloud.com/NTSRadio/', + 'description': 'md5:ba49da971ae8d71ee45813c52c5e2a04', + 'tags': [], + 'duration': 3529, + 'timestamp': 1658588400, + 'repost_count': int, + 'upload_date': '20220723', + 'uploader_id': 'NTSRadio', + 'thumbnail': 'https://thumbnailer.mixcloud.com/unsafe/1024x1024/extaudio/5/1/a/d/ae3e-1be9-4fd4-983e-9c3294226eac', + 'uploader': 'Mixcloud NTS Radio', + 'genres': ['Minimal Synth', 'Post Punk', 'Industrial '], + 'modified_timestamp': 1658842165, + 'modified_date': '20220726', + }, + 'params': {'skip_download': 'm3u8'}, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + data = self._search_json(r'window\._REACT_STATE_\s*=', webpage, 'react state', video_id) + + return { + '_type': 'url_transparent', + **traverse_obj(data, ('episode', { + 'url': ('audio_sources', ..., 'url', {url_or_none}, any), + 'title': ('name', {str}), + 'description': ('description', {str}), + 'genres': ('genres', ..., 'value', {str}), + 'timestamp': ('broadcast', {parse_iso8601}), + 'modified_timestamp': ('updated', {parse_iso8601}), + })), + } From 0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9 Mon Sep 17 00:00:00 2001 From: TuxCoder Date: Thu, 23 May 2024 06:25:16 +0200 Subject: [PATCH 015/145] [ie/orf:on] Improve extraction (#9677) Closes #9652 Authored by: TuxCoder --- yt_dlp/extractor/orf.py | 42 ++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 526e9acaf376..13561202c6ca 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -14,6 +14,7 @@ make_archive_id, mimetype2ext, orderedSet, + parse_age_limit, remove_end, smuggle_url, strip_jsonp, @@ -569,7 +570,7 @@ def _real_extract(self, url): class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' - _VALID_URL = r'https?://on\.orf\.at/video/(?P\d{8})/(?P[\w-]+)' + _VALID_URL = r'https?://on\.orf\.at/video/(?P\d+)' _TESTS = [{ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', 'info_dict': { @@ -583,32 +584,55 @@ class ORFONIE(InfoExtractor): 'timestamp': 1706472362, 'upload_date': '20240128', } + }, { + 'url': 'https://on.orf.at/video/3220355', + 'md5': 'f94d98e667cf9a3851317efb4e136662', + 'info_dict': { + 'id': '3220355', + 'ext': 'mp4', + 'duration': 445.04, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png', + 'title': '50 Jahre Burgenland: Der Festumzug', + 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0', + 'media_type': 'episode', + 'timestamp': 52916400, + 'upload_date': '19710905', + } }] - def _extract_video(self, video_id, display_id): + def _extract_video(self, video_id): encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() api_json = self._download_json( - f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id) + f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) + + if traverse_obj(api_json, 'is_drm_protected'): + self.report_drm(video_id) formats, subtitles = [], {} for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): if manifest_type == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles( - manifest_url, display_id, fatal=False, m3u8_id='hls') + manifest_url, video_id, fatal=False, m3u8_id='hls') elif manifest_type == 'dash': fmts, subs = self._extract_mpd_formats_and_subtitles( - manifest_url, display_id, fatal=False, mpd_id='dash') + manifest_url, video_id, fatal=False, mpd_id='dash') else: continue formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + for sub_url in traverse_obj(api_json, ( + '_embedded', 'subtitle', + ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})): + self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles) + return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, **traverse_obj(api_json, { + 'age_limit': ('age_classification', {parse_age_limit}), 'duration': ('duration_second', {float_or_none}), 'title': (('title', 'headline'), {str}), 'description': (('description', 'teaser_text'), {str}), @@ -617,14 +641,14 @@ def _extract_video(self, video_id, display_id): } def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).group('id', 'slug') - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) return { 'id': video_id, 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), 'description': self._html_search_meta( ['description', 'og:description', 'twitter:description'], webpage, default=None), - **self._search_json_ld(webpage, display_id, fatal=False), - **self._extract_video(video_id, display_id), + **self._search_json_ld(webpage, video_id, fatal=False), + **self._extract_video(video_id), } From 5bbfdb7c999b22f1aeca0c3489c167d6eb73013b Mon Sep 17 00:00:00 2001 From: BohwaZ Date: Thu, 23 May 2024 06:30:21 +0200 Subject: [PATCH 016/145] [ie/HearThisAt] Improve `_VALID_URL` (#9949) Closes #9755 Authored by: bohwaz, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- yt_dlp/extractor/hearthisat.py | 44 ++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index d1a400d8ccbb..c7da8f97dee0 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -7,13 +7,14 @@ class HearThisAtIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P[^/]+)/(?P[A-Za-z0-9\-]+)/?$' + _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/?#]+)/(?P<title>[\w.-]+)' _PLAYLIST_URL = 'https://hearthis.at/playlist.php' _TESTS = [{ 'url': 'https://hearthis.at/moofi/dr-kreep', 'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 'info_dict': { 'id': '150939', + 'display_id': 'moofi - dr-kreep', 'ext': 'wav', 'title': 'Moofi - Dr. Kreep', 'thumbnail': r're:^https?://.*\.jpg$', @@ -21,15 +22,16 @@ class HearThisAtIE(InfoExtractor): 'description': 'md5:1adb0667b01499f9d27e97ddfd53852a', 'upload_date': '20150118', 'view_count': int, - 'duration': 71, - 'genre': 'Experimental', - } + 'duration': 70, + 'genres': ['Experimental'], + }, }, { # 'download' link redirects to the original webpage 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/', 'md5': '5980ceb7c461605d30f1f039df160c6e', 'info_dict': { 'id': '811296', + 'display_id': 'twitchsf - dj-jim-hopkins-totally-bitchin-80s-dance-mix', 'ext': 'mp3', 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!', 'description': 'md5:ef26815ca8f483272a87b137ff175be2', @@ -38,7 +40,39 @@ class HearThisAtIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, 'duration': 4360, - 'genre': 'Dance', + 'genres': ['Dance'], + }, + }, { + 'url': 'https://hearthis.at/tindalos/0001-tindalos-gnrique/eQd/', + 'md5': 'cd08e51911f147f6da2d9678905b0bd9', + 'info_dict': { + 'id': '2685222', + 'ext': 'mp3', + 'duration': 86, + 'view_count': int, + 'timestamp': 1545471670, + 'display_id': 'tindalos - 0001-tindalos-gnrique', + 'thumbnail': r're:^https?://.*\.jpg$', + 'genres': ['Other'], + 'title': 'Tindalos - Tindalos - générique n°1', + 'description': '', + 'upload_date': '20181222', + }, + }, { + 'url': 'https://hearthis.at/sithi2/biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011/', + 'md5': 'b45ac60f0c8111eef6ddc10ec232e312', + 'info_dict': { + 'id': '7145959', + 'ext': 'mp3', + 'description': 'md5:d7ae36a453d78903f6b7ed6eb2fce1f2', + 'duration': 8986, + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'md5:62669ce5b1b67f45c6f846033f37d3b9', + 'timestamp': 1588699409, + 'display_id': 'sithi2 - biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011', + 'view_count': int, + 'upload_date': '20200505', + 'genres': ['Other'], }, }] From eead3bbc01f6529862bdad1f0b2adeabda4f006e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 23 May 2024 16:25:16 +0000 Subject: [PATCH 017/145] [ie/brilliantpala] Fix login (#9788) Closes #9771 Authored by: pzhlkj6612 --- yt_dlp/extractor/brilliantpala.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py index 0bf8622c1d7c..950a70a5e1c3 100644 --- a/yt_dlp/extractor/brilliantpala.py +++ b/yt_dlp/extractor/brilliantpala.py @@ -27,8 +27,17 @@ def _get_logged_in_username(self, url, video_id): r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username') def _perform_login(self, username, password): - login_form = self._hidden_inputs(self._download_webpage( - self._LOGIN_API, None, 'Downloading login page')) + login_page, urlh = self._download_webpage_handle( + self._LOGIN_API, None, 'Downloading login page', expected_status=401) + if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API): + self.write_debug('Cookies are valid, no login required.') + return + + if urlh.status == 401: + self.write_debug('Got HTTP Error 401; cookies have been invalidated') + login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page') + + login_form = self._hidden_inputs(login_page) login_form.update({ 'username': username, 'password': password, From 82f4f4444e26daf35b7302c406fe2312f78f619e Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Fri, 24 May 2024 00:26:24 +0800 Subject: [PATCH 018/145] [ie/reddit] Fix subtitles extraction (#10006) Authored by: kclauhk --- yt_dlp/extractor/reddit.py | 61 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 62f669f35da0..44c0353da636 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -5,9 +5,11 @@ ExtractorError, float_or_none, int_or_none, + parse_qs, traverse_obj, try_get, unescapeHTML, + update_url_query, urlencode_postdata, url_or_none, ) @@ -76,7 +78,7 @@ class RedditIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'comment_count': int, - 'age_limit': 0, + 'age_limit': 18, 'channel_id': 'u_creepyt0es', }, 'params': { @@ -150,6 +152,51 @@ class RedditIE(InfoExtractor): 'like_count': int, }, 'skip': 'Requires account that has opted-in to the GenZedong subreddit', + }, { + # subtitles in HLS manifest + 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/', + 'info_dict': { + 'id': 'a2mdj5d57qyc1', + 'ext': 'mp4', + 'display_id': '1cl9h0u', + 'title': 'The insurance claim will be interesting', + 'uploader': 'darrenpauli', + 'channel_id': 'Unexpected', + 'duration': 53, + 'upload_date': '20240506', + 'timestamp': 1714966382, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + 'subtitles': {'en': 'mincount:1'}, + }, + 'params': { + 'skip_download': True, + }, + }, { + # subtitles from caption-url + 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/', + 'info_dict': { + 'id': 'xbmj4t3igy1d1', + 'ext': 'mp4', + 'display_id': '1cxwzso', + 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'', + 'uploader': 'Woodstovia', + 'channel_id': 'soccer', + 'duration': 30, + 'upload_date': '20240522', + 'timestamp': 1716373798, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + 'subtitles': {'en': 'mincount:1'}, + }, + 'params': { + 'skip_download': True, + 'writesubtitles': True, + }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, @@ -197,6 +244,12 @@ def _perform_login(self, username, password): elif not traverse_obj(login, ('json', 'data', 'cookie', {str})): raise ExtractorError('Unable to login, no cookie was returned') + def _get_subtitles(self, video_id): + # Fallback if there were no subtitles provided by DASH or HLS manifests + caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt' + if self._is_valid_url(caption_url, video_id, item='subtitles'): + return {'en': [{'url': caption_url}]} + def _real_extract(self, url): host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') @@ -307,6 +360,10 @@ def add_thumbnail(src): dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + qs = traverse_obj(parse_qs(hls_playlist_url), { + 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}), + }) + hls_playlist_url = update_url_query(hls_playlist_url, qs) formats = [{ 'url': unescapeHTML(reddit_video['fallback_url']), @@ -332,7 +389,7 @@ def add_thumbnail(src): 'id': video_id, 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles, + 'subtitles': subtitles or self.extract_subtitles(video_id), 'duration': int_or_none(reddit_video.get('duration')), } From 63b569bc5e7d461753637a20ad84a575adee4c0a Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Thu, 23 May 2024 14:15:56 -0400 Subject: [PATCH 019/145] [ie/taptap] Add extractors (#9776) Closes #9643 Authored by: c-basalt --- yt_dlp/extractor/_extractors.py | 6 + yt_dlp/extractor/taptap.py | 275 ++++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 yt_dlp/extractor/taptap.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9dfa28c4bb4b..dcdd24ce5ec1 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1905,6 +1905,12 @@ from .syfy import SyfyIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE +from .taptap import ( + TapTapMomentIE, + TapTapAppIE, + TapTapAppIntlIE, + TapTapPostIntlIE, +) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py new file mode 100644 index 000000000000..56f2f0ef4b6c --- /dev/null +++ b/yt_dlp/extractor/taptap.py @@ -0,0 +1,275 @@ +import re +import uuid + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + join_nonempty, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class TapTapBaseIE(InfoExtractor): + _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC' + _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get' + _INFO_API = None + _INFO_QUERY_KEY = 'id' + _DATA_PATH = None + _ID_PATH = None + _META_PATH = None + + def _get_api(self, url, video_id, query, **kwargs): + query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())} + return self._download_json(url, video_id, query=query, **kwargs)['data'] + + def _extract_video(self, video_id): + video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0] + + # h265 playlist contains both h265 and h264 formats + video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any)) + formats = self._extract_m3u8_formats(video_url, video_id, fatal=False) + for format in formats: + if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')): + format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_') + + return { + 'id': str(video_id), + 'formats': formats, + **traverse_obj(video_data, ({ + 'duration': ('info', 'duration', {int_or_none}), + 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}), + }), get_all=False) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + query = {self._INFO_QUERY_KEY: video_id} + + data = traverse_obj( + self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH) + + metainfo = traverse_obj(data, self._META_PATH) + entries = [{ + **metainfo, + **self._extract_video(id) + } for id in set(traverse_obj(data, self._ID_PATH))] + + return self.playlist_result(entries, **metainfo, id=video_id) + + +class TapTapMomentIE(TapTapBaseIE): + _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail' + _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id') + _META_PATH = ('moment', { + 'timestamp': ('created_time', {int_or_none}), + 'modified_timestamp': ('edited_time', {int_or_none}), + 'uploader': ('author', 'user', 'name', {str}), + 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}), + 'title': ('topic', 'title', {str}), + 'description': ('topic', 'summary', {str}), + }) + _TESTS = [{ + 'url': 'https://www.taptap.cn/moment/194618230982052443', + 'info_dict': { + 'id': '194618230982052443', + 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', + 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', + 'timestamp': 1633453402, + 'upload_date': '20211005', + 'modified_timestamp': 1633453402, + 'modified_date': '20211005', + 'uploader': '乌酱', + 'uploader_id': '532896', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2202584', + 'ext': 'mp4', + 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', + 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', + 'duration': 66, + 'timestamp': 1633453402, + 'upload_date': '20211005', + 'modified_timestamp': 1633453402, + 'modified_date': '20211005', + 'uploader': '乌酱', + 'uploader_id': '532896', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.taptap.cn/moment/521630629209573493', + 'info_dict': { + 'id': '521630629209573493', + 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', + 'description': 'md5:2c81245da864428c904d53ae4ad2182b', + 'timestamp': 1711425600, + 'upload_date': '20240326', + 'modified_timestamp': 1711425600, + 'modified_date': '20240326', + 'uploader': '崩坏:星穹铁道', + 'uploader_id': '414732580', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '4006511', + 'ext': 'mp4', + 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', + 'description': 'md5:2c81245da864428c904d53ae4ad2182b', + 'duration': 173, + 'timestamp': 1711425600, + 'upload_date': '20240326', + 'modified_timestamp': 1711425600, + 'modified_date': '20240326', + 'uploader': '崩坏:星穹铁道', + 'uploader_id': '414732580', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.taptap.cn/moment/540493587511511299', + 'playlist_count': 2, + 'info_dict': { + 'id': '540493587511511299', + 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!', + 'description': 'md5:d60842350e686ddb242291ddfb8e39c9', + 'timestamp': 1715920200, + 'upload_date': '20240517', + 'modified_timestamp': 1715942225, + 'modified_date': '20240517', + 'uploader': 'TapTap 编辑', + 'uploader_id': '7159244', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapAppIE(TapTapBaseIE): + _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail' + _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') + _META_PATH = { + 'title': ('title', {str}), + 'description': ('description', 'text', {str}, {clean_html}), + } + _TESTS = [{ + 'url': 'https://www.taptap.cn/app/168332', + 'info_dict': { + 'id': '168332', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + }, + 'playlist_count': 2, + 'playlist': [{ + 'info_dict': { + 'id': '4058443', + 'ext': 'mp4', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + 'duration': 26, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }, { + 'info_dict': { + 'id': '4058462', + 'ext': 'mp4', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + 'duration': 295, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapIntlBase(TapTapBaseIE): + _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0' + _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get' + + +class TapTapAppIntlIE(TapTapIntlBase): + _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail' + _DATA_PATH = 'app' + _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') + _META_PATH = { + 'title': ('title', {str}), + 'description': ('description', 'text', {str}, {clean_html}), + } + _TESTS = [{ + 'url': 'https://www.taptap.io/app/233287', + 'info_dict': { + 'id': '233287', + 'title': '《虹彩六號 M》', + 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2149708997', + 'ext': 'mp4', + 'title': '《虹彩六號 M》', + 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', + 'duration': 78, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapPostIntlIE(TapTapIntlBase): + _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail' + _INFO_QUERY_KEY = 'id_str' + _DATA_PATH = 'post' + _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id') + _META_PATH = { + 'timestamp': ('published_time', {int_or_none}), + 'modified_timestamp': ('edited_time', {int_or_none}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'id', {int}, {str_or_none}), + 'title': ('title', {str}), + 'description': ('list_fields', 'summary', {str}), + } + _TESTS = [{ + 'url': 'https://www.taptap.io/post/571785', + 'info_dict': { + 'id': '571785', + 'title': 'Arknights x Rainbow Six Siege | Event PV', + 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', + 'timestamp': 1614664951, + 'upload_date': '20210302', + 'modified_timestamp': 1614664951, + 'modified_date': '20210302', + 'uploader': 'TapTap Editor', + 'uploader_id': '80224473', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2149491903', + 'ext': 'mp4', + 'title': 'Arknights x Rainbow Six Siege | Event PV', + 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', + 'duration': 122, + 'timestamp': 1614664951, + 'upload_date': '20210302', + 'modified_timestamp': 1614664951, + 'modified_date': '20210302', + 'uploader': 'TapTap Editor', + 'uploader_id': '80224473', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] From 3779f2a307ba3ef1d28e107cdd71b221dfb4eb36 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Thu, 23 May 2024 22:18:20 +0200 Subject: [PATCH 020/145] [ie/ORFTVthek] Remove extractor (#10011) Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/orf.py | 183 +------------------------------- 2 files changed, 3 insertions(+), 181 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index dcdd24ce5ec1..6f0656e0c3ed 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1387,7 +1387,6 @@ ) from .ora import OraTVIE from .orf import ( - ORFTVthekIE, ORFFM4StoryIE, ORFONIE, ORFRadioIE, diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 13561202c6ca..3c837becdb89 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -3,204 +3,24 @@ import re from .common import InfoExtractor -from ..networking import HEADRequest from ..utils import ( - InAdvancePagedList, clean_html, determine_ext, float_or_none, int_or_none, - join_nonempty, make_archive_id, mimetype2ext, orderedSet, parse_age_limit, remove_end, - smuggle_url, strip_jsonp, try_call, - unescapeHTML, unified_strdate, - unsmuggle_url, url_or_none, ) from ..utils.traversal import traverse_obj -class ORFTVthekIE(InfoExtractor): - IE_NAME = 'orf:tvthek' - IE_DESC = 'ORF TVthek' - _VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])' - - _TESTS = [{ - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079', - 'info_dict': { - 'id': '14121079', - }, - 'playlist_count': 11, - 'params': {'noplaylist': True} - }, { - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150', - 'info_dict': { - 'id': '14121079', - }, - 'playlist_count': 1, - 'params': {'playlist_items': '5'} - }, { - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150', - 'info_dict': { - 'id': '14121079', - }, - 'playlist': [{ - 'info_dict': { - 'id': '15083150', - 'ext': 'mp4', - 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04', - 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg', - 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?', - } - }], - 'playlist_count': 1, - 'params': {'noplaylist': True, 'skip_download': 'm3u8'} - }, { - 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', - 'playlist': [{ - 'md5': '2942210346ed779588f428a92db88712', - 'info_dict': { - 'id': '8896777', - 'ext': 'mp4', - 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', - 'description': 'md5:c1272f0245537812d4e36419c207b67d', - 'duration': 2668, - 'upload_date': '20141208', - }, - }], - 'skip': 'Blocked outside of Austria / Germany', - }, { - 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', - 'info_dict': { - 'id': '7982259', - 'ext': 'mp4', - 'title': 'Best of Ingrid Thurnher', - 'upload_date': '20140527', - 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', - }, - 'params': { - 'skip_download': True, # rtsp downloads - }, - 'skip': 'Blocked outside of Austria / Germany', - }, { - 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141', - 'only_matching': True, - }, { - 'url': 'http://tvthek.orf.at/profile/Universum/35429', - 'only_matching': True, - }] - - def _pagefunc(self, url, data_jsb, n, *, image=None): - sd = data_jsb[n] - video_id, title = str(sd['id']), sd['title'] - formats = [] - for fd in sd['sources']: - src = url_or_none(fd.get('src')) - if not src: - continue - format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd) - ext = determine_ext(src) - if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest') - if any('/geoprotection' in f['url'] for f in m3u8_formats): - self.raise_geo_restricted() - formats.extend(m3u8_formats) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - src, video_id, f4m_id=format_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest')) - else: - formats.append({ - 'format_id': format_id, - 'url': src, - 'protocol': fd.get('protocol'), - }) - - # Check for geoblocking. - # There is a property is_geoprotection, but that's always false - geo_str = sd.get('geoprotection_string') - http_url = next( - (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])), - None) if geo_str else None - if http_url: - self._request_webpage( - HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking', - errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats') - - subtitles = {} - for sub in sd.get('subtitles', []): - sub_src = sub.get('src') - if not sub_src: - continue - subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({ - 'url': sub_src, - }) - - upload_date = unified_strdate(sd.get('created_date')) - - thumbnails = [] - preview = sd.get('preview_image_url') - if preview: - thumbnails.append({ - 'id': 'preview', - 'url': preview, - 'preference': 0, - }) - image = sd.get('image_full_url') or image - if image: - thumbnails.append({ - 'id': 'full', - 'url': image, - 'preference': 1, - }) - - yield { - 'id': video_id, - 'title': title, - 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}), - 'formats': formats, - 'subtitles': subtitles, - 'description': sd.get('description'), - 'duration': int_or_none(sd.get('duration_in_seconds')), - 'upload_date': upload_date, - 'thumbnails': thumbnails, - } - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url) - playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url') - webpage = self._download_webpage(url, playlist_id) - - data_jsb = self._parse_json( - self._search_regex( - r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2', - webpage, 'playlist', group='json'), - playlist_id, transform_source=unescapeHTML)['playlist']['videos'] - - if not self._yes_playlist(playlist_id, video_id, smuggled_data): - data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id] - - playlist_count = len(data_jsb) - image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None - - page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image) - return { - '_type': 'playlist', - 'entries': InAdvancePagedList(page_func, playlist_count, 1), - 'id': playlist_id, - } - - class ORFRadioIE(InfoExtractor): IE_NAME = 'orf:radio' @@ -583,6 +403,7 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 1706472362, 'upload_date': '20240128', + '_old_archive_ids': ['orftvthek 14210000'], } }, { 'url': 'https://on.orf.at/video/3220355', @@ -597,6 +418,7 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 52916400, 'upload_date': '19710905', + '_old_archive_ids': ['orftvthek 3220355'], } }] @@ -631,6 +453,7 @@ def _extract_video(self, video_id): 'id': video_id, 'formats': formats, 'subtitles': subtitles, + '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], **traverse_obj(api_json, { 'age_limit': ('age_classification', {parse_age_limit}), 'duration': ('duration_second', {float_or_none}), From 90d2da311bbb5dc06f385ee428c7e4590936e995 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 10:01:40 -0500 Subject: [PATCH 021/145] [ie/DiscoveryPlus] Fix dmax.de and related extractors (#10020) Closes #7530 Authored by: bashonly --- yt_dlp/extractor/dplay.py | 43 ++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 363b4bec9e37..1ecc4baf6799 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -355,12 +355,10 @@ def _download_video_playback_info(self, disco_base, video_id, headers): video_id, headers=headers, data=json.dumps({ 'deviceInfo': { 'adBlocker': False, + 'drmSupported': False, }, 'videoId': video_id, - 'wisteriaProperties': { - 'platform': 'desktop', - 'product': self._PRODUCT, - }, + 'wisteriaProperties': {}, }).encode('utf-8'))['data']['attributes']['streaming'] def _real_extract(self, url): @@ -878,10 +876,31 @@ def _update_disco_api_headers(self, headers, disco_base, display_id, realm): }) -class DiscoveryNetworksDeIE(DPlayBaseIE): +class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' _TESTS = [{ + 'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold', + 'info_dict': { + 'id': '4756322', + 'ext': 'mp4', + 'title': 'German Gold', + 'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6', + 'display_id': 'goldrausch-in-australien/german-gold', + 'episode': 'Episode 1', + 'episode_number': 1, + 'season': 'Season 5', + 'season_number': 5, + 'series': 'Goldrausch in Australien', + 'duration': 2648.0, + 'upload_date': '20230517', + 'timestamp': 1684357500, + 'creators': ['DMAX'], + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg', + 'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'info_dict': { 'id': '78867', @@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE): 'season_number': 1, 'thumbnail': r're:https://.+\.jpg', }, - 'params': { - 'skip_download': True, - }, + 'skip': '404 Not Found', }, { 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', 'only_matching': True, @@ -920,8 +937,14 @@ def _real_extract(self, url): country = 'GB' if domain == 'dplay.co.uk' else 'DE' realm = 'questuk' if country == 'GB' else domain.replace('.', '') return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) + url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) class DiscoveryPlusShowBaseIE(DPlayBaseIE): From c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:00:33 -0500 Subject: [PATCH 022/145] [ie/tele5] Overhaul extractor (#10024) Closes #3051, Closes #7955, Closes #8501, Closes #9792 Authored by: bashonly --- yt_dlp/extractor/tele5.py | 134 +++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 73 deletions(-) diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 72f67e402451..a45537541534 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,89 +1,77 @@ -from .dplay import DPlayIE -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - extract_attributes, -) +import functools +from .dplay import DiscoveryPlusBaseIE +from ..utils import join_nonempty +from ..utils.traversal import traverse_obj -class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _GEO_COUNTRIES = ['DE'] + +class Tele5IE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?' _TESTS = [{ - 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', + # slug_a and slug_b + 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane', 'info_dict': { - 'id': '1549416', + 'id': '6852024', 'ext': 'mp4', - 'upload_date': '20180814', - 'timestamp': 1534290623, - 'title': 'Pandorum', - }, - 'params': { - 'skip_download': True, + 'title': 'Quarantäne', + 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675', + 'episode': 'Episode 73', + 'episode_number': 73, + 'season': 'Season 4', + 'season_number': 4, + 'series': 'Stargate Atlantis', + 'upload_date': '20240525', + 'timestamp': 1716643200, + 'duration': 2503.2, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'skip': 'No longer available: "404 Seite nicht gefunden"', }, { - # jwplatform, nexx unavailable - 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', + # only slug_a + 'url': 'https://tele5.de/mediathek/inside-out', 'info_dict': { - 'id': 'WJuiOlUp', + 'id': '6819502', 'ext': 'mp4', - 'upload_date': '20200603', - 'timestamp': 1591214400, - 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters', - 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97', + 'title': 'Inside out', + 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd', + 'series': 'Inside out', + 'upload_date': '20240523', + 'timestamp': 1716494400, + 'duration': 5343.4, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available, redirects to Filme page', }, { - 'url': 'https://tele5.de/mediathek/angel-of-mine/', + # playlist + 'url': 'https://tele5.de/mediathek/schlefaz', 'info_dict': { - 'id': '1252360', - 'ext': 'mp4', - 'upload_date': '20220109', - 'timestamp': 1641762000, - 'title': 'Angel of Mine', - 'description': 'md5:a72546a175e1286eb3251843a52d1ad7', + 'id': 'mediathek-schlefaz', }, - 'params': { - 'format': 'bestvideo', - }, - }, { - 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/anders-ist-sevda/', - 'only_matching': True, + 'playlist_mincount': 3, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player') - player_info = extract_attributes(player_element) - asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', )) - endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname - source_type = player_info.get('sourcetype') - if source_type: - endpoint = '%s-%s' % (source_type, endpoint) - try: - return self._get_disco_api_info(url, asset_id, endpoint, realm, country) - except ExtractorError as e: - if getattr(e, 'message', '') == 'Missing deviceId in context': - self.report_drm(video_id) - raise + parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b') + playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-') + + query = {'environment': 'tele5', 'v': '2'} + if not slug_b: + endpoint = f'page/{slug_a}' + query['parent_slug'] = parent_slug + else: + endpoint = f'videos/{slug_b}' + query['filter[show.slug]'] = slug_a + cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query) + + return self.playlist_result(map( + functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'), + traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) From 1463945ae5fb05986a0bd1aa02e41d1a08d93a02 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:03:05 -0500 Subject: [PATCH 023/145] [ie/jiocinema] Add extractors (#10026) Closes #5563, Closes #7759, Closes #8679, Closes #9349 Authored by: bashonly --- README.md | 3 + yt_dlp/extractor/_extractors.py | 8 +- yt_dlp/extractor/jiocinema.py | 403 ++++++++++++++++++++++++++++++++ yt_dlp/extractor/voot.py | 212 ----------------- 4 files changed, 410 insertions(+), 216 deletions(-) create mode 100644 yt_dlp/extractor/jiocinema.py delete mode 100644 yt_dlp/extractor/voot.py diff --git a/README.md b/README.md index 887cfde2319a..0636d2f6e7ba 100644 --- a/README.md +++ b/README.md @@ -1835,6 +1835,9 @@ The following extractors use this feature: #### nflplusreplay * `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default +#### jiocinema +* `refresh_token`: The `refreshToken` UUID from browser local storage can be passed to extend the life of your login session when logging in with `token` as username and the `accessToken` from browser local storage as password + #### jiosaavn * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320` diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 6f0656e0c3ed..b807728ee38d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -872,6 +872,10 @@ SangiinIE, ) from .jeuxvideo import JeuxVideoIE +from .jiocinema import ( + JioCinemaIE, + JioCinemaSeriesIE, +) from .jiosaavn import ( JioSaavnSongIE, JioSaavnAlbumIE, @@ -2282,10 +2286,6 @@ VoicyChannelIE, ) from .volejtv import VolejTVIE -from .voot import ( - VootIE, - VootSeriesIE, -) from .voxmedia import ( VoxMediaVolumeIE, VoxMediaIE, diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py new file mode 100644 index 000000000000..e7186d75c5af --- /dev/null +++ b/yt_dlp/extractor/jiocinema.py @@ -0,0 +1,403 @@ +import base64 +import itertools +import json +import random +import re +import string +import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + jwt_decode_hs256, + parse_age_limit, + try_call, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class JioCinemaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'jiocinema' + _GEO_BYPASS = False + _ACCESS_TOKEN = None + _REFRESH_TOKEN = None + _GUEST_TOKEN = None + _USER_ID = None + _DEVICE_ID = None + _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'} + _APP_NAME = {'appName': 'RJIL_JioCinema'} + _APP_VERSION = {'appVersion': '5.0.0'} + _API_SIGNATURES = 'o668nxgzwff' + _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi' + _ACCESS_HINT = 'the `accessToken` from your browser local storage' + _LOGIN_HINT = ( + 'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, ' + f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. ' + 'If you have previously logged in with yt-dlp and your session ' + 'has been cached, you can use "-u device -p <DEVICE_ID>"') + + def _cache_token(self, token_type): + assert token_type in ('access', 'refresh', 'all') + if token_type in ('access', 'all'): + self.cache.store( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN) + if token_type in ('refresh', 'all'): + self.cache.store( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN) + + def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}): + return self._download_json( + url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + **self._API_HEADERS, + **headers, + }, expected_status=(400, 403, 474)) + + def _call_auth_api(self, service, endpoint, note, headers={}, data={}): + return self._call_api( + f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}', + None, note=note, headers=headers, data=data) + + def _refresh_token(self): + if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID: + raise ExtractorError('User token has expired', expected=True) + response = self._call_auth_api( + 'token', 'refreshtoken', 'Refreshing token', + headers={'accesstoken': self._ACCESS_TOKEN}, data={ + **self._APP_NAME, + 'deviceId': self._DEVICE_ID, + 'refreshToken': self._REFRESH_TOKEN, + **self._APP_VERSION, + }) + refresh_token = response.get('refreshTokenId') + if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN: + JioCinemaBaseIE._REFRESH_TOKEN = refresh_token + self._cache_token('refresh') + JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] + self._cache_token('access') + + def _fetch_guest_token(self): + JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10)) + guest_token = self._call_auth_api( + 'token', 'guest', 'Downloading guest token', data={ + **self._APP_NAME, + 'deviceType': 'phone', + 'os': 'ios', + 'deviceId': self._DEVICE_ID, + 'freshLaunch': False, + 'adId': self._DEVICE_ID, + **self._APP_VERSION, + }) + self._GUEST_TOKEN = guest_token['authToken'] + self._USER_ID = guest_token['userId'] + + def _call_login_api(self, endpoint, guest_token, data, note): + return self._call_auth_api( + 'user', f'loginotp/{endpoint}', note, headers={ + **self.geo_verification_headers(), + 'accesstoken': self._GUEST_TOKEN, + **self._APP_NAME, + **traverse_obj(guest_token, 'data', { + 'deviceType': ('deviceType', {str}), + 'os': ('os', {str}), + })}, data=data) + + def _is_token_expired(self, token): + return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180) + + def _perform_login(self, username, password): + if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN): + return + + UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' + + if username.lower() == 'token': + if try_call(lambda: jwt_decode_hs256(password)): + JioCinemaBaseIE._ACCESS_TOKEN = password + refresh_hint = 'the `refreshToken` UUID from your browser local storage' + refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0] + if not refresh_token: + self.to_screen( + 'To extend the life of your login session, in addition to your access token, ' + 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" ' + f'where REFRESH_TOKEN is {refresh_hint}') + elif re.fullmatch(UUID_RE, refresh_token): + JioCinemaBaseIE._REFRESH_TOKEN = refresh_token + else: + self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}') + else: + raise ExtractorError( + f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True) + + elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password): + JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh') + JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access') + if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN: + raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True) + + elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password): + self._fetch_guest_token() + guest_token = jwt_decode_hs256(self._GUEST_TOKEN) + initial_data = { + 'number': base64.b64encode(password.encode()).decode(), + **self._APP_VERSION, + } + response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP') + if not traverse_obj(response, ('OTPInfo', {dict})): + raise ExtractorError('There was a problem with the phone number login attempt') + + is_iphone = guest_token.get('os') == 'ios' + response = self._call_login_api('verify', guest_token, { + 'deviceInfo': { + 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android', + 'info': { + 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'}, + 'androidId': self._DEVICE_ID, + 'type': 'iOS' if is_iphone else 'Android' + } + }, + **initial_data, + 'otp': self._get_tfa_info('the one-time password sent to your phone') + }, 'Submitting OTP') + if traverse_obj(response, 'code') == 1043: + raise ExtractorError('Wrong OTP', expected=True) + JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken'] + JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] + + else: + raise ExtractorError(self._LOGIN_HINT, expected=True) + + user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data'] + JioCinemaBaseIE._USER_ID = user_token['userId'] + JioCinemaBaseIE._DEVICE_ID = user_token['deviceId'] + if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device': + self._cache_token('all') + if self.get_param('cachedir') is not False: + self.to_screen( + f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"') + elif not JioCinemaBaseIE._REFRESH_TOKEN: + JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh') + if JioCinemaBaseIE._REFRESH_TOKEN: + self._cache_token('access') + self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"') + if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN): + self._refresh_token() + + +class JioCinemaIE(JioCinemaBaseIE): + IE_NAME = 'jiocinema' + _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})' + _TESTS = [{ + 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931', + 'info_dict': { + 'id': '3759931', + 'ext': 'mp4', + 'title': 'Pradeep to stop the wedding?', + 'description': 'md5:75f72d1d1a66976633345a3de6d672b1', + 'episode': 'Pradeep to stop the wedding?', + 'episode_number': 89, + 'season': 'Agnisakshi…Ek Samjhauta-S1', + 'season_number': 1, + 'series': 'Agnisakshi Ek Samjhauta', + 'duration': 1238.0, + 'thumbnail': r're:https?://.+\.jpg', + 'age_limit': 13, + 'season_id': '3698031', + 'upload_date': '20230606', + 'timestamp': 1686009600, + 'release_date': '20230607', + 'genres': ['Drama'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch', + 'info_dict': { + 'id': '3754021', + 'ext': 'mp4', + 'title': 'Bhediya', + 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0', + 'episode': 'Bhediya', + 'duration': 8500.0, + 'thumbnail': r're:https?://.+\.jpg', + 'age_limit': 13, + 'upload_date': '20230525', + 'timestamp': 1685026200, + 'release_date': '20230524', + 'genres': ['Comedy'], + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _extract_formats_and_subtitles(self, playback, video_id): + m3u8_url = traverse_obj(playback, ( + 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any)) + if not m3u8_url: # DRM-only content only serves dash urls + self.report_drm(video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls') + self._remove_duplicate_formats(formats) + + return { + # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p + 'formats': traverse_obj(formats, ( + lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), + 'subtitles': subtitles, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN): + self._fetch_guest_token() + elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN): + self._refresh_token() + + playback = self._call_api( + f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id, + 'Downloading playback JSON', headers={ + **self.geo_verification_headers(), + 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN, + **self._APP_NAME, + 'deviceid': self._DEVICE_ID, + 'uniqueid': self._USER_ID, + 'x-apisignatures': self._API_SIGNATURES, + 'x-platform': 'androidweb', + 'x-platform-token': 'web', + }, data={ + '4k': False, + 'ageGroup': '18+', + 'appVersion': '3.4.0', + 'bitrateProfile': 'xhdpi', + 'capability': { + 'drmCapability': { + 'aesSupport': 'yes', + 'fairPlayDrmSupport': 'none', + 'playreadyDrmSupport': 'none', + 'widevineDRMSupport': 'none' + }, + 'frameRateCapability': [{ + 'frameRateSupport': '30fps', + 'videoQuality': '1440p' + }] + }, + 'continueWatchingRequired': False, + 'dolby': False, + 'downloadRequest': False, + 'hevc': False, + 'kidsSafe': False, + 'manufacturer': 'Windows', + 'model': 'Windows', + 'multiAudioRequired': True, + 'osVersion': '10', + 'parentalPinValid': True, + 'x-apisignatures': self._API_SIGNATURES + }) + + status_code = traverse_obj(playback, ('code', {int})) + if status_code == 474: + self.raise_geo_restricted(countries=['IN']) + elif status_code == 1008: + error_msg = 'This content is only available for premium users' + if self._ACCESS_TOKEN: + raise ExtractorError(error_msg, expected=True) + self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None) + elif status_code == 400: + raise ExtractorError('The requested content is not available', expected=True) + elif status_code is not None and status_code != 200: + raise ExtractorError( + f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}') + + metadata = self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details', + video_id, fatal=False, query={ + 'ids': f'include:{video_id}', + 'responseType': 'common', + 'devicePlatformType': 'desktop', + }) + + return { + 'id': video_id, + 'http_headers': self._API_HEADERS, + **self._extract_formats_and_subtitles(playback, video_id), + **traverse_obj(playback, ('data', { + # fallback metadata + 'title': ('name', {str}), + 'description': ('fullSynopsis', {str}), + 'series': ('show', 'name', {str}, {lambda x: x or None}), + 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), + 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}), + 'episode': ('fullTitle', {str}), + 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}), + 'age_limit': ('ageNemonic', {parse_age_limit}), + 'duration': ('totalDuration', {float_or_none}), + 'thumbnail': ('images', {url_or_none}), + })), + **traverse_obj(metadata, ('result', 0, { + 'title': ('fullTitle', {str}), + 'description': ('fullSynopsis', {str}), + 'series': ('showName', {str}, {lambda x: x or None}), + 'season': ('seasonName', {str}, {lambda x: x or None}), + 'season_number': ('season', {int_or_none}), + 'season_id': ('seasonId', {str}, {lambda x: x or None}), + 'episode': ('fullTitle', {str}), + 'episode_number': ('episode', {int_or_none}), + 'timestamp': ('uploadTime', {int_or_none}), + 'release_date': ('telecastDate', {str}), + 'age_limit': ('ageNemonic', {parse_age_limit}), + 'duration': ('duration', {float_or_none}), + 'genres': ('genres', ..., {str}), + 'thumbnail': ('seo', 'ogImage', {url_or_none}), + })), + } + + +class JioCinemaSeriesIE(JioCinemaBaseIE): + IE_NAME = 'jiocinema:series' + _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})' + _TESTS = [{ + 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917', + 'info_dict': { + 'id': '3499917', + 'title': 'naagin', + }, + 'playlist_mincount': 120, + }] + + def _entries(self, series_id): + seasons = self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id, + 'Downloading series metadata JSON', query={ + 'sort': 'season:asc', + 'id': series_id, + 'responseType': 'common', + }) + + for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1): + season_id = season['id'] + label = season.get('season') or season_num + for page_num in itertools.count(1): + episodes = traverse_obj(self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', + season_id, f'Downloading season {label} page {page_num} JSON', query={ + 'sort': 'episode:asc', + 'id': season_id, + 'responseType': 'common', + 'page': page_num, + }), ('result', lambda _, v: v['id'] and url_or_none(v['slug']))) + if not episodes: + break + for episode in episodes: + yield self.url_result( + episode['slug'], JioCinemaIE, **traverse_obj(episode, { + 'video_id': 'id', + 'video_title': ('fullTitle', {str}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + })) + + def _real_extract(self, url): + slug, series_id = self._match_valid_url(url).group('slug', 'id') + return self.playlist_result(self._entries(series_id), series_id, slug) diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py deleted file mode 100644 index ef77bedd27ef..000000000000 --- a/yt_dlp/extractor/voot.py +++ /dev/null @@ -1,212 +0,0 @@ -import json -import time -import uuid - -from .common import InfoExtractor -from ..compat import compat_str -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - jwt_decode_hs256, - parse_age_limit, - traverse_obj, - try_call, - try_get, - unified_strdate, -) - - -class VootBaseIE(InfoExtractor): - _NETRC_MACHINE = 'voot' - _GEO_BYPASS = False - _LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.' - _TOKEN = None - _EXPIRY = 0 - _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'} - - def _perform_login(self, username, password): - if self._TOKEN and self._EXPIRY: - return - - if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)): - VootBaseIE._TOKEN = password - VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp'] - self.report_login() - - # Mobile number as username is not supported - elif not username.isdigit(): - check_username = self._download_json( - 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({ - 'type': 'email', - 'email': username - }, separators=(',', ':')).encode(), headers={ - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - }, note='Checking username', expected_status=403) - if not traverse_obj(check_username, ('isExist', {bool})): - if traverse_obj(check_username, ('status', 'code', {int})) == 9999: - self.raise_geo_restricted(countries=['IN']) - raise ExtractorError('Incorrect username', expected=True) - auth_token = traverse_obj(self._download_json( - 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({ - 'type': 'traditional', - 'deviceId': str(uuid.uuid4()), - 'deviceBrand': 'PC/MAC', - 'data': { - 'email': username, - 'password': password - } - }, separators=(',', ':')).encode(), headers={ - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - }, note='Logging in', expected_status=400), ('data', 'authToken', {dict})) - if not auth_token: - raise ExtractorError('Incorrect password', expected=True) - VootBaseIE._TOKEN = auth_token['accessToken'] - VootBaseIE._EXPIRY = auth_token['expirationTime'] - - else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - def _check_token_expiry(self): - if int(time.time()) >= self._EXPIRY: - raise ExtractorError('Access token has expired', expected=True) - - def _real_initialize(self): - if not self._TOKEN: - self.raise_login_required(self._LOGIN_HINT, method=None) - self._check_token_expiry() - - -class VootIE(VootBaseIE): - _WORKING = False - _VALID_URL = r'''(?x) - (?: - voot:| - https?://(?:www\.)?voot\.com/? - (?: - movies?/[^/]+/| - (?:shows|kids)/(?:[^/]+/){4} - ) - ) - (?P<id>\d{3,}) - ''' - _TESTS = [{ - 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', - 'info_dict': { - 'id': '441353', - 'ext': 'mp4', - 'title': 'Is this the end of Kamini?', - 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', - 'timestamp': 1472103000, - 'upload_date': '20160825', - 'series': 'Ishq Ka Rang Safed', - 'season_number': 1, - 'episode': 'Is this the end of Kamini?', - 'episode_number': 340, - 'release_date': '20160825', - 'season': 'Season 1', - 'age_limit': 13, - 'duration': 1146.0, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925', - 'only_matching': True, - }, { - 'url': 'https://www.voot.com/movies/pandavas-5/424627', - 'only_matching': True, - }, { - 'url': 'https://www.voot.com/movie/fight-club/621842', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - media_info = self._download_json( - 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id, - query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN}) - - try: - m3u8_url = self._download_json( - 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id, - 'Downloading playback JSON', data=b'{}', headers={ - **self.geo_verification_headers(), - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - 'platform': 'androidwebdesktop', - 'vootid': video_id, - 'voottoken': self._TOKEN, - })['m3u8'] - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 400: - self._check_token_expiry() - raise - - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') - self._remove_duplicate_formats(formats) - - return { - 'id': video_id, - # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p - 'formats': traverse_obj(formats, ( - lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), - 'http_headers': self._API_HEADERS, - **traverse_obj(media_info, ('result', 0, { - 'title': ('fullTitle', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('showName', {str}), - 'season_number': ('season', {int_or_none}), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', {int_or_none}), - 'timestamp': ('uploadTime', {int_or_none}), - 'release_date': ('telecastDate', {unified_strdate}), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('duration', {float_or_none}), - })), - } - - -class VootSeriesIE(VootBaseIE): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})' - _TESTS = [{ - 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002', - 'playlist_mincount': 442, - 'info_dict': { - 'id': '100002', - }, - }, { - 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003', - 'playlist_mincount': 341, - 'info_dict': { - 'id': '100003', - }, - }] - _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common' - _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}' - - def _entries(self, show_id): - show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id) - for season in show_json.get('result', []): - page_num = 1 - season_id = try_get(season, lambda x: x['id'], compat_str) - season_json = self._download_json(self._SEASON_API.format(season_id, page_num), - video_id=season_id, - note='Downloading JSON metadata page %d' % page_num) - episodes_json = season_json.get('result', []) - while episodes_json: - page_num += 1 - for episode in episodes_json: - video_id = episode.get('id') - yield self.url_result( - 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id) - episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num), - video_id=season_id, - note='Downloading JSON metadata page %d' % page_num)['result'] - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) From 0d067e77c3f5527946fb0c22ee1c7011994cba40 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:16:17 -0500 Subject: [PATCH 024/145] [ie/dangalplay] Add extractors (#10021) Closes #8258 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/dangalplay.py | 197 ++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 yt_dlp/extractor/dangalplay.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b807728ee38d..973f8c321320 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -453,6 +453,10 @@ DamtomoRecordIE, DamtomoVideoIE, ) +from .dangalplay import ( + DangalPlayIE, + DangalPlaySeasonIE, +) from .daum import ( DaumIE, DaumClipIE, diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py new file mode 100644 index 000000000000..50e4136b578a --- /dev/null +++ b/yt_dlp/extractor/dangalplay.py @@ -0,0 +1,197 @@ +import hashlib +import json +import re +import time + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none +from ..utils.traversal import traverse_obj + + +class DangalPlayBaseIE(InfoExtractor): + _NETRC_MACHINE = 'dangalplay' + _OTV_USER_ID = None + _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _API_BASE = 'https://ottapi.dangalplay.com' + _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js + _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above + + def _perform_login(self, username, password): + if self._OTV_USER_ID: + return + if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + raise ExtractorError(self._LOGIN_HINT, expected=True) + self._OTV_USER_ID = password + + def _real_initialize(self): + if not self._OTV_USER_ID: + self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None) + + def _extract_episode_info(self, metadata, episode_slug, series_slug): + return { + 'display_id': episode_slug, + 'episode_number': int_or_none(self._search_regex( + r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)), + 'season_number': int_or_none(self._search_regex( + r'season-(\d+)', series_slug, 'season number', default='1')), + 'series': series_slug, + **traverse_obj(metadata, { + 'id': ('content_id', {str}), + 'title': ('display_title', {str}), + 'episode': ('title', {str}), + 'series': ('show_name', {str}, {lambda x: x or None}), + 'series_id': ('catalog_id', {str}), + 'duration': ('duration', {int_or_none}), + 'release_timestamp': ('release_date_uts', {int_or_none}), + }), + } + + def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}): + return self._download_json( + f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, + headers={'Accept': 'application/json'}, query={ + 'auth_token': self._AUTH_TOKEN, + 'region': 'IN', + **query, + }) + + +class DangalPlayIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01', + 'info_dict': { + 'id': '647c61dc1e7171310dcd49b4', + 'ext': 'mp4', + 'release_timestamp': 1262304000, + 'episode_number': 1, + 'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'series': 'kitani-mohabbat-hai-season-2', + 'season_number': 2, + 'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'release_date': '20100101', + 'duration': 2325, + 'season': 'Season 2', + 'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01', + 'series_id': '645c9ea41e717158ca574966', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01', + 'info_dict': { + 'id': '65d31d9ba73b9c3abd14a7f3', + 'ext': 'mp4', + 'episode': 'EP 1 | MILKE BHI HUM NA MILE', + 'release_timestamp': 1708367411, + 'episode_number': 1, + 'season': 'Season 1', + 'title': 'EP 1 | MILKE BHI HUM NA MILE', + 'duration': 156048, + 'release_date': '20240219', + 'season_number': 1, + 'series': 'MILKE BHI HUM NA MILE', + 'series_id': '645c9ea41e717158ca574966', + 'display_id': 'milke-bhi-hum-na-mile-ep-number-01', + }, + }] + + def _generate_api_data(self, data): + catalog_id = data['catalog_id'] + content_id = data['content_id'] + timestamp = str(int(time.time())) + unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY)) + + return json.dumps({ + 'catalog_id': catalog_id, + 'content_id': content_id, + 'category': '', + 'region': 'IN', + 'auth_token': self._AUTH_TOKEN, + 'id': self._OTV_USER_ID, + 'md5': hashlib.md5(unhashed.encode()).hexdigest(), + 'ts': timestamp, + }, separators=(',', ':')).encode() + + def _real_extract(self, url): + series_slug, episode_slug = self._match_valid_url(url).group('series', 'id') + metadata = self._call_api( + f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip', + episode_slug, query={'item_language': ''})['data'] + + try: + details = self._download_json( + f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug, + 'Downloading playback details JSON', headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, data=self._generate_api_data(metadata))['data'] + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 422: + error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} + if error_info.get('code') == '1016': + self.raise_login_required( + f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) + elif msg := error_info.get('message'): + raise ExtractorError(msg) + raise + + m3u8_url = traverse_obj(details, ( + ('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any)) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4') + + return { + 'formats': formats, + 'subtitles': subtitles, + **self._extract_episode_info(metadata, episode_slug, series_slug), + } + + +class DangalPlaySeasonIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay:season' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1', + 'playlist_mincount': 170, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes', + 'playlist_count': 30, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1', + }, + }, { + # 1 season only, series page is season page + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile', + 'playlist_mincount': 15, + 'info_dict': { + 'id': 'milke-bhi-hum-na-mile', + }, + }] + + def _entries(self, subcategories, series_slug): + for subcategory in subcategories: + data = self._call_api( + f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip', + series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={ + 'order_by': 'asc', + 'status': 'published', + }) + for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])): + episode_slug = ep['friendly_id'] + yield self.url_result( + f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}', + DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug)) + + def _real_extract(self, url): + series_slug, subcategory = self._match_valid_url(url).group('id', 'sub') + subcategories = [subcategory] if subcategory else traverse_obj( + self._call_api( + f'catalogs/shows/items/{series_slug}.gzip', series_slug, + 'Downloading season info JSON', query={'item_language': ''}), + ('data', 'subcategories', ..., 'friendly_id', {str})) + + return self.playlist_result( + self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory)) From 3ba8de62d61d782256f5c1e9939a0762039657de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Finn=20R=2E=20G=C3=A4rtner?= <65015656+FinnRG@users.noreply.github.com> Date: Sun, 26 May 2024 01:40:35 +0200 Subject: [PATCH 025/145] [ie/Piapro] Fix extractor (#9311) Closes #9884 Authored by: FinnRG, seproDev --- yt_dlp/extractor/piapro.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index 3ae985da2b36..87d912d568d9 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -2,6 +2,8 @@ from ..compat import compat_urlparse from ..utils import ( ExtractorError, + clean_html, + get_element_by_class, parse_duration, parse_filesize, str_to_int, @@ -88,34 +90,22 @@ def _real_extract(self, url): if category_id not in ('1', '2', '21', '22', '23', '24', '25'): raise ExtractorError('The URL does not contain audio.', expected=True) - str_duration, str_filesize = self._search_regex( - r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', - group=(1, 2), default=(None, None)) - str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False) - - uploader_id, uploader = self._search_regex( - r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader', - group=(1, 2), default=(None, None)) - content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') - create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') - - player_webpage = self._download_webpage( - f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', - video_id, note='Downloading player webpage') + def extract_info(name, description): + return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage, description, default=None) return { 'id': video_id, - 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False), - 'uploader': uploader, - 'uploader_id': uploader_id, - 'timestamp': unified_timestamp(create_date, False), - 'duration': parse_duration(str_duration), - 'view_count': str_to_int(str_viewcount), + 'title': clean_html(get_element_by_class('contents_title', webpage)), + 'description': clean_html(get_element_by_class('contents_description', webpage)), + 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage)), + 'uploader_id': self._search_regex( + r'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage), 'uploader id', default=None), + 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False), + 'duration': parse_duration(extract_info('長さ', 'duration')), + 'view_count': str_to_int(extract_info('閲覧数', 'view count')), 'thumbnail': self._html_search_meta('twitter:image', webpage), - - 'filesize_approx': parse_filesize(str_filesize.replace(',', '')), - 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'), + 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')), + 'url': self._search_regex(r'\"url\":\s*\"(.*?)\"', webpage, 'url'), 'ext': 'mp3', 'vcodec': 'none', } From a2e9031605d87c469be9ce98dbbdf4960b727338 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sun, 26 May 2024 07:54:17 +0800 Subject: [PATCH 026/145] [ie/XiaoHongShu] Add extractor (#9646) Closes #9529 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/xiaohongshu.py | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 yt_dlp/extractor/xiaohongshu.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 973f8c321320..fc18ead3a94b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2386,6 +2386,7 @@ XHamsterEmbedIE, XHamsterUserIE, ) +from .xiaohongshu import XiaoHongShuIE from .ximalaya import ( XimalayaIE, XimalayaAlbumIE diff --git a/yt_dlp/extractor/xiaohongshu.py b/yt_dlp/extractor/xiaohongshu.py new file mode 100644 index 000000000000..faad9d923591 --- /dev/null +++ b/yt_dlp/extractor/xiaohongshu.py @@ -0,0 +1,83 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + js_to_json, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class XiaoHongShuIE(InfoExtractor): + _VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)' + IE_DESC = '小红书' + _TESTS = [{ + 'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9', + 'md5': '2a87a77ddbedcaeeda8d7eae61b61228', + 'info_dict': { + 'id': '6411cf99000000001300b6d9', + 'ext': 'mp4', + 'uploader_id': '5c31698d0000000007018a31', + 'description': '#今日快乐今日发[话题]# #吃货薯看这里[话题]# #香妃蛋糕[话题]# #小五卷蛋糕[话题]# #新手蛋糕卷[话题]#', + 'title': '香妃蛋糕也太香了吧🔥不需要卷❗️绝对的友好', + 'tags': ['今日快乐今日发', '吃货薯看这里', '香妃蛋糕', '小五卷蛋糕', '新手蛋糕卷'], + 'duration': 101.726, + 'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + initial_state = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json) + + note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note')) + video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...)) + + formats = [] + for info in video_info: + format_info = traverse_obj(info, { + 'fps': ('fps', {int_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'vcodec': ('videoCodec', {str}), + 'acodec': ('audioCodec', {str}), + 'abr': ('audioBitrate', {int_or_none}), + 'vbr': ('videoBitrate', {int_or_none}), + 'audio_channels': ('audioChannels', {int_or_none}), + 'tbr': ('avgBitrate', {int_or_none}), + 'format': ('qualityType', {str}), + 'filesize': ('size', {int_or_none}), + 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}) + }) + + formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), { + lambda u: url_or_none(u) and {'url': u, **format_info}}))) + + thumbnails = [] + for image_info in traverse_obj(note_info, ('imageList', ...)): + thumbnail_info = traverse_obj(image_info, { + 'height': ('height', {int_or_none}), + 'width': ('width', {int_or_none}), + }) + for thumb_url in traverse_obj(image_info, (('urlDefault', 'urlPre'), {url_or_none})): + thumbnails.append({ + 'url': thumb_url, + **thumbnail_info, + }) + + return { + 'id': display_id, + 'formats': formats, + 'thumbnails': thumbnails, + 'title': self._html_search_meta(['og:title'], webpage, default=None), + **traverse_obj(note_info, { + 'title': ('title', {str}), + 'description': ('desc', {str}), + 'tags': ('tagList', ..., 'name', {str}), + 'uploader_id': ('user', 'userId', {str}), + }), + } From e897bd8292a41999cf51dba91b390db5643c72db Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Sun, 26 May 2024 21:27:21 +0200 Subject: [PATCH 027/145] [misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- .github/PULL_REQUEST_TEMPLATE.md | 1 - .github/workflows/core.yml | 2 +- .github/workflows/quick-test.yml | 16 +- .gitignore | 2 +- .pre-commit-config.yaml | 14 + .pre-commit-hatch.yaml | 9 + CONTRIBUTING.md | 77 +- Makefile | 7 +- devscripts/install_deps.py | 12 +- devscripts/run_tests.py | 14 +- pyproject.toml | 156 +++- setup.cfg | 6 - test/test_http_proxy.py | 1 + yt_dlp/extractor/_extractors.py | 1039 ++++++++++++----------- yt_dlp/extractor/abc.py | 4 +- yt_dlp/extractor/abematv.py | 5 +- yt_dlp/extractor/acfun.py | 4 +- yt_dlp/extractor/adn.py | 4 +- yt_dlp/extractor/adobetv.py | 4 +- yt_dlp/extractor/airtv.py | 2 +- yt_dlp/extractor/allstar.py | 1 - yt_dlp/extractor/alphaporno.py | 4 +- yt_dlp/extractor/alura.py | 12 +- yt_dlp/extractor/amara.py | 2 +- yt_dlp/extractor/amp.py | 2 +- yt_dlp/extractor/anchorfm.py | 2 +- yt_dlp/extractor/angel.py | 2 +- yt_dlp/extractor/appleconnect.py | 5 +- yt_dlp/extractor/appletrailers.py | 2 +- yt_dlp/extractor/arnes.py | 2 +- yt_dlp/extractor/atvat.py | 2 +- yt_dlp/extractor/awaan.py | 2 +- yt_dlp/extractor/banbye.py | 4 +- yt_dlp/extractor/bannedvideo.py | 6 +- yt_dlp/extractor/beeg.py | 1 - yt_dlp/extractor/bleacherreport.py | 2 +- yt_dlp/extractor/blogger.py | 2 +- yt_dlp/extractor/bostonglobe.py | 1 - yt_dlp/extractor/boxcast.py | 6 +- yt_dlp/extractor/brainpop.py | 2 +- yt_dlp/extractor/brightcove.py | 4 +- yt_dlp/extractor/cbs.py | 6 +- yt_dlp/extractor/cinetecamilano.py | 1 + yt_dlp/extractor/clippit.py | 4 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/corus.py | 2 +- yt_dlp/extractor/crackle.py | 2 +- yt_dlp/extractor/cspan.py | 6 +- yt_dlp/extractor/ctsnews.py | 2 +- yt_dlp/extractor/dailymail.py | 2 +- yt_dlp/extractor/damtomo.py | 2 +- yt_dlp/extractor/democracynow.py | 4 +- yt_dlp/extractor/digitalconcerthall.py | 1 - yt_dlp/extractor/discoverygo.py | 2 +- yt_dlp/extractor/disney.py | 4 +- yt_dlp/extractor/douyutv.py | 2 +- yt_dlp/extractor/dplay.py | 2 +- yt_dlp/extractor/drtuber.py | 2 +- yt_dlp/extractor/duboku.py | 2 +- yt_dlp/extractor/dvtv.py | 4 +- yt_dlp/extractor/dw.py | 2 +- yt_dlp/extractor/ertgr.py | 4 +- yt_dlp/extractor/europa.py | 2 +- yt_dlp/extractor/euscreen.py | 3 +- yt_dlp/extractor/eyedotv.py | 4 +- yt_dlp/extractor/fancode.py | 8 +- yt_dlp/extractor/faz.py | 2 +- yt_dlp/extractor/fczenit.py | 2 +- yt_dlp/extractor/fifa.py | 1 - yt_dlp/extractor/filmon.py | 4 +- yt_dlp/extractor/gab.py | 2 +- yt_dlp/extractor/gamejolt.py | 2 +- yt_dlp/extractor/gaskrank.py | 1 + yt_dlp/extractor/generic.py | 2 +- yt_dlp/extractor/gettr.py | 2 +- yt_dlp/extractor/gigya.py | 1 - yt_dlp/extractor/glomex.py | 2 +- yt_dlp/extractor/go.py | 10 +- yt_dlp/extractor/godresource.py | 2 +- yt_dlp/extractor/gofile.py | 5 +- yt_dlp/extractor/gotostage.py | 9 +- yt_dlp/extractor/hbo.py | 4 +- yt_dlp/extractor/hearthisat.py | 2 +- yt_dlp/extractor/hketv.py | 2 +- yt_dlp/extractor/hrti.py | 2 +- yt_dlp/extractor/huya.py | 6 +- yt_dlp/extractor/ichinanalive.py | 2 +- yt_dlp/extractor/infoq.py | 4 +- yt_dlp/extractor/iprima.py | 6 +- yt_dlp/extractor/iqiyi.py | 10 +- yt_dlp/extractor/itprotv.py | 3 +- yt_dlp/extractor/itv.py | 9 +- yt_dlp/extractor/iwara.py | 4 +- yt_dlp/extractor/jamendo.py | 2 +- yt_dlp/extractor/japandiet.py | 4 +- yt_dlp/extractor/jove.py | 5 +- yt_dlp/extractor/jstream.py | 2 +- yt_dlp/extractor/kakao.py | 2 +- yt_dlp/extractor/kaltura.py | 8 +- yt_dlp/extractor/kankanews.py | 4 +- yt_dlp/extractor/kuwo.py | 4 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/lecture2go.py | 2 +- yt_dlp/extractor/lecturio.py | 2 +- yt_dlp/extractor/leeco.py | 2 +- yt_dlp/extractor/libraryofcongress.py | 1 - yt_dlp/extractor/lifenews.py | 2 +- yt_dlp/extractor/limelight.py | 2 +- yt_dlp/extractor/linkedin.py | 2 +- yt_dlp/extractor/mainstreaming.py | 3 +- yt_dlp/extractor/manoto.py | 7 +- yt_dlp/extractor/medaltv.py | 2 +- yt_dlp/extractor/mediaklikk.py | 7 +- yt_dlp/extractor/mediaset.py | 4 +- yt_dlp/extractor/mediasite.py | 5 +- yt_dlp/extractor/microsoftstream.py | 2 +- yt_dlp/extractor/mildom.py | 4 +- yt_dlp/extractor/mit.py | 4 +- yt_dlp/extractor/monstercat.py | 2 +- yt_dlp/extractor/moviepilot.py | 2 +- yt_dlp/extractor/movingimage.py | 2 +- yt_dlp/extractor/msn.py | 2 +- yt_dlp/extractor/n1.py | 2 +- yt_dlp/extractor/naver.py | 2 +- yt_dlp/extractor/nba.py | 2 +- yt_dlp/extractor/nbc.py | 2 +- yt_dlp/extractor/ndr.py | 2 +- yt_dlp/extractor/nfhsnetwork.py | 8 +- yt_dlp/extractor/nhl.py | 2 +- yt_dlp/extractor/ninenews.py | 2 +- yt_dlp/extractor/ninenow.py | 2 +- yt_dlp/extractor/nitter.py | 9 +- yt_dlp/extractor/nobelprize.py | 6 +- yt_dlp/extractor/noz.py | 6 +- yt_dlp/extractor/nuevo.py | 6 +- yt_dlp/extractor/nuvid.py | 2 +- yt_dlp/extractor/nzherald.py | 5 +- yt_dlp/extractor/odkmedia.py | 2 +- yt_dlp/extractor/olympics.py | 5 +- yt_dlp/extractor/onenewsnz.py | 6 +- yt_dlp/extractor/onet.py | 4 +- yt_dlp/extractor/opencast.py | 2 +- yt_dlp/extractor/openrec.py | 2 +- yt_dlp/extractor/ora.py | 1 + yt_dlp/extractor/packtpub.py | 3 +- yt_dlp/extractor/panopto.py | 10 +- yt_dlp/extractor/paramountplus.py | 2 +- yt_dlp/extractor/pbs.py | 4 +- yt_dlp/extractor/pearvideo.py | 2 +- yt_dlp/extractor/peertube.py | 2 +- yt_dlp/extractor/piksel.py | 2 +- yt_dlp/extractor/pladform.py | 4 +- yt_dlp/extractor/platzi.py | 2 +- yt_dlp/extractor/playtvak.py | 2 +- yt_dlp/extractor/pluralsight.py | 2 +- yt_dlp/extractor/polsatgo.py | 2 +- yt_dlp/extractor/pornflip.py | 6 +- yt_dlp/extractor/pornovoisines.py | 2 +- yt_dlp/extractor/prx.py | 11 +- yt_dlp/extractor/puhutv.py | 2 +- yt_dlp/extractor/qingting.py | 1 - yt_dlp/extractor/qqmusic.py | 2 +- yt_dlp/extractor/radiocanada.py | 2 +- yt_dlp/extractor/radiocomercial.py | 2 +- yt_dlp/extractor/radiozet.py | 2 +- yt_dlp/extractor/radlive.py | 4 +- yt_dlp/extractor/rai.py | 4 +- yt_dlp/extractor/rbgtum.py | 2 +- yt_dlp/extractor/rcti.py | 4 +- yt_dlp/extractor/rds.py | 4 +- yt_dlp/extractor/redbulltv.py | 2 +- yt_dlp/extractor/reddit.py | 2 +- yt_dlp/extractor/redgifs.py | 2 +- yt_dlp/extractor/redtube.py | 2 +- yt_dlp/extractor/reuters.py | 2 +- yt_dlp/extractor/rmcdecouverte.py | 2 +- yt_dlp/extractor/rte.py | 2 +- yt_dlp/extractor/rtp.py | 9 +- yt_dlp/extractor/rtvcplay.py | 7 +- yt_dlp/extractor/rtvs.py | 1 - yt_dlp/extractor/rutube.py | 2 +- yt_dlp/extractor/rutv.py | 6 +- yt_dlp/extractor/ruutu.py | 2 +- yt_dlp/extractor/safari.py | 1 - yt_dlp/extractor/scrippsnetworks.py | 4 +- yt_dlp/extractor/scte.py | 2 +- yt_dlp/extractor/sendtonews.py | 6 +- yt_dlp/extractor/seznamzpravy.py | 2 +- yt_dlp/extractor/shahid.py | 2 +- yt_dlp/extractor/shemaroome.py | 2 +- yt_dlp/extractor/sixplay.py | 2 +- yt_dlp/extractor/skynewsarabia.py | 2 +- yt_dlp/extractor/sohu.py | 8 +- yt_dlp/extractor/sovietscloset.py | 5 +- yt_dlp/extractor/spankbang.py | 2 +- yt_dlp/extractor/springboardplatform.py | 6 +- yt_dlp/extractor/startv.py | 4 +- yt_dlp/extractor/stitcher.py | 2 +- yt_dlp/extractor/storyfire.py | 2 +- yt_dlp/extractor/streamable.py | 2 +- yt_dlp/extractor/stripchat.py | 2 +- yt_dlp/extractor/sunporno.py | 4 +- yt_dlp/extractor/syfy.py | 2 +- yt_dlp/extractor/tbs.py | 2 +- yt_dlp/extractor/teachable.py | 4 +- yt_dlp/extractor/teachertube.py | 2 +- yt_dlp/extractor/teamcoco.py | 2 +- yt_dlp/extractor/teamtreehouse.py | 2 +- yt_dlp/extractor/ted.py | 5 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/telewebion.py | 1 + yt_dlp/extractor/tempo.py | 2 +- yt_dlp/extractor/tencent.py | 2 +- yt_dlp/extractor/theguardian.py | 2 +- yt_dlp/extractor/theintercept.py | 4 +- yt_dlp/extractor/theplatform.py | 24 +- yt_dlp/extractor/threeqsdn.py | 2 +- yt_dlp/extractor/toypics.py | 3 +- yt_dlp/extractor/triller.py | 2 +- yt_dlp/extractor/trueid.py | 4 +- yt_dlp/extractor/tumblr.py | 2 +- yt_dlp/extractor/turner.py | 12 +- yt_dlp/extractor/tv2.py | 4 +- yt_dlp/extractor/tv2hu.py | 2 +- yt_dlp/extractor/tvanouvelles.py | 2 +- yt_dlp/extractor/tvn24.py | 2 +- yt_dlp/extractor/tvp.py | 2 +- yt_dlp/extractor/tvplay.py | 2 +- yt_dlp/extractor/tvplayer.py | 2 +- yt_dlp/extractor/tweakers.py | 2 +- yt_dlp/extractor/twitter.py | 2 +- yt_dlp/extractor/udn.py | 2 +- yt_dlp/extractor/ukcolumn.py | 8 +- yt_dlp/extractor/urplay.py | 4 +- yt_dlp/extractor/usatoday.py | 2 +- yt_dlp/extractor/ustream.py | 4 +- yt_dlp/extractor/ustudio.py | 2 +- yt_dlp/extractor/veo.py | 1 - yt_dlp/extractor/vesti.py | 2 +- yt_dlp/extractor/vevo.py | 2 +- yt_dlp/extractor/vice.py | 4 +- yt_dlp/extractor/vidio.py | 2 +- yt_dlp/extractor/vidlii.py | 2 +- yt_dlp/extractor/vimeo.py | 10 +- yt_dlp/extractor/viu.py | 6 +- yt_dlp/extractor/vk.py | 2 +- yt_dlp/extractor/walla.py | 2 +- yt_dlp/extractor/washingtonpost.py | 1 - yt_dlp/extractor/wdr.py | 4 +- yt_dlp/extractor/weibo.py | 2 +- yt_dlp/extractor/whowatch.py | 4 +- yt_dlp/extractor/wimtv.py | 2 +- yt_dlp/extractor/wppilot.py | 10 +- yt_dlp/extractor/wsj.py | 2 +- yt_dlp/extractor/xhamster.py | 2 +- yt_dlp/extractor/xnxx.py | 2 +- yt_dlp/extractor/xstream.py | 4 +- yt_dlp/extractor/xvideos.py | 2 +- yt_dlp/extractor/xxxymovies.py | 2 +- yt_dlp/extractor/yandexmusic.py | 2 +- yt_dlp/extractor/zapiks.py | 4 +- yt_dlp/extractor/zhihu.py | 2 +- yt_dlp/extractor/zingmp3.py | 2 +- yt_dlp/extractor/zype.py | 2 +- 264 files changed, 1197 insertions(+), 987 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 .pre-commit-hatch.yaml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c4d3e812e2e4..4deee572f416 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -28,7 +28,6 @@ Fixes # ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests -- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) ### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 70769f967fe7..fdfdebc65d3d 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -53,7 +53,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi + run: python3 ./devscripts/install_deps.py --include test --include curl-cffi - name: Run tests continue-on-error: False run: | diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 24b34911f391..3afb51a308bc 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -15,13 +15,13 @@ jobs: with: python-version: '3.8' - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev + run: python3 ./devscripts/install_deps.py --include test - name: Run tests run: | python3 -m yt_dlp -v || true python3 ./devscripts/run_tests.py core - flake8: - name: Linter + check: + name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: @@ -29,9 +29,11 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.8' - - name: Install flake8 - run: python3 ./devscripts/install_deps.py -o --include dev + - name: Install dev dependencies + run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors run: python3 ./devscripts/make_lazy_extractors.py - - name: Run flake8 - run: flake8 . + - name: Run ruff + run: ruff check --output-format github . + - name: Run autopep8 + run: autopep8 --diff . diff --git a/.gitignore b/.gitignore index 630c2e01f5b5..db322c4f08f1 100644 --- a/.gitignore +++ b/.gitignore @@ -67,7 +67,7 @@ cookies # Python *.pyc *.pyo -.pytest_cache +.*_cache wine-py2exe/ py2exe.log build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000000..a821eeefb16b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: +- repo: local + hooks: + - id: linter + name: Apply linter fixes + entry: ruff check --fix . + language: system + types: [python] + require_serial: true + - id: format + name: Apply formatting fixes + entry: autopep8 --in-place . + language: system + types: [python] diff --git a/.pre-commit-hatch.yaml b/.pre-commit-hatch.yaml new file mode 100644 index 000000000000..fb7d25e1db54 --- /dev/null +++ b/.pre-commit-hatch.yaml @@ -0,0 +1,9 @@ +repos: +- repo: local + hooks: + - id: fix + name: Apply code fixes + entry: hatch fmt + language: system + types: [python] + require_serial: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c94ec55a69bc..837b600e3178 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,18 +134,53 @@ We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-a # DEVELOPER INSTRUCTIONS -Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). +Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases), get them via [the other installation methods](README.md#installation) or directly run it using `python -m yt_dlp`. -To run yt-dlp as a developer, you don't need to build anything either. Simply execute +`yt-dlp` uses [`hatch`](<https://hatch.pypa.io>) as a project management tool. +You can easily install it using [`pipx`](<https://pipx.pypa.io>) via `pipx install hatch`, or else via `pip` or your package manager of choice. Make sure you are using at least version `1.10.0`, otherwise some functionality might not work as expected. - python3 -m yt_dlp +If you plan on contributing to `yt-dlp`, best practice is to start by running the following command: -To run all the available core tests, use: +```shell +$ hatch run setup +``` + +The above command will install a `pre-commit` hook so that required checks/fixes (linting, formatting) will run automatically before each commit. If any code needs to be linted or formatted, then the commit will be blocked and the necessary changes will be made; you should review all edits and re-commit the fixed version. - python3 devscripts/run_tests.py +After this you can use `hatch shell` to enable a virtual environment that has `yt-dlp` and its development dependencies installed. + +In addition, the following script commands can be used to run simple tasks such as linting or testing (without having to run `hatch shell` first): +* `hatch fmt`: Automatically fix linter violations and apply required code formatting changes + * See `hatch fmt --help` for more info +* `hatch test`: Run extractor or core tests + * See `hatch test --help` for more info See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. +While it is strongly recommended to use `hatch` for yt-dlp development, if you are unable to do so, alternatively you can manually create a virtual environment and use the following commands: + +```shell +# To only install development dependencies: +$ python -m devscripts.install_deps --include dev + +# Or, for an editable install plus dev dependencies: +$ python -m pip install -e ".[default,dev]" + +# To setup the pre-commit hook: +$ pre-commit install + +# To be used in place of `hatch test`: +$ python -m devscripts.run_tests + +# To be used in place of `hatch fmt`: +$ ruff check --fix . +$ autopep8 --in-place . + +# To only check code instead of applying fixes: +$ ruff check . +$ autopep8 --diff . +``` + If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). @@ -165,12 +200,16 @@ After you have ensured this site is distributing its content legally, you can fo 1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) 1. Check out the source code with: - git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + ```shell + $ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + ``` 1. Start a new git branch with - cd yt-dlp - git checkout -b yourextractor + ```shell + $ cd yt-dlp + $ git checkout -b yourextractor + ``` 1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: @@ -217,21 +256,27 @@ After you have ensured this site is distributing its content legally, you can fo # TODO more properties (see yt_dlp/extractor/common.py) } ``` -1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. -1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` +1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. Also note that when adding a parenthesized import group, the last import in the group must have a trailing comma in order for this formatting to be respected by our code formatter. +1. Run `hatch test YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` 1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. -1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): +1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions), passes [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started) code checks and is properly formatted: + + ```shell + $ hatch fmt --check + ``` - $ flake8 yt_dlp/extractor/yourextractor.py + You can use `hatch fmt` to automatically fix problems. 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - $ git add yt_dlp/extractor/_extractors.py - $ git add yt_dlp/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add extractor' - $ git push origin yourextractor + ```shell + $ git add yt_dlp/extractor/_extractors.py + $ git add yt_dlp/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add extractor' + $ git push origin yourextractor + ``` 1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. diff --git a/Makefile b/Makefile index cef4bc6cb1ac..b8f010086151 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ clean-dist: yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS clean-cache: find . \( \ - -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ + -type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ \) -prune -exec rm -rf {} \; completion-bash: completions/bash/yt-dlp @@ -70,7 +70,8 @@ uninstall: rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish codetest: - flake8 . + ruff check . + autopep8 --diff . test: $(PYTHON) -m pytest @@ -151,7 +152,7 @@ yt-dlp.tar.gz: all --exclude '*.pyo' \ --exclude '*~' \ --exclude '__pycache__' \ - --exclude '.pytest_cache' \ + --exclude '.*_cache' \ --exclude '.git' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py index d33fc637c6fa..d292505458b7 100755 --- a/devscripts/install_deps.py +++ b/devscripts/install_deps.py @@ -42,17 +42,25 @@ def parse_args(): def main(): args = parse_args() project_table = parse_toml(read_file(args.input))['project'] + recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]') optional_groups = project_table['optional-dependencies'] excludes = args.exclude or [] + def yield_deps(group): + for dep in group: + if mobj := recursive_pattern.fullmatch(dep): + yield from optional_groups.get(mobj.group('group_name'), []) + else: + yield dep + targets = [] if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group targets.extend(project_table['dependencies']) if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group - targets.extend(optional_groups['default']) + targets.extend(yield_deps(optional_groups['default'])) for include in filter(None, map(optional_groups.get, args.include or [])): - targets.extend(include) + targets.extend(yield_deps(include)) targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes] diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py index 6d638a974863..c605aa62cfc8 100755 --- a/devscripts/run_tests.py +++ b/devscripts/run_tests.py @@ -4,6 +4,7 @@ import functools import os import re +import shlex import subprocess import sys from pathlib import Path @@ -18,6 +19,8 @@ def parse_args(): 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') parser.add_argument( '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') + parser.add_argument( + '--pytest-args', help='arguments to passthrough to pytest') return parser.parse_args() @@ -26,15 +29,16 @@ def run_tests(*tests, pattern=None, ci=False): run_download = 'download' in tests tests = list(map(fix_test_name, tests)) - arguments = ['pytest', '-Werror', '--tb=short'] + pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') + arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)] if ci: arguments.append('--color=yes') + if pattern: + arguments.extend(['-k', pattern]) if run_core: arguments.extend(['-m', 'not download']) elif run_download: arguments.extend(['-m', 'download']) - elif pattern: - arguments.extend(['-k', pattern]) else: arguments.extend( f'test/test_download.py::TestDownload::test_{test}' for test in tests) @@ -46,13 +50,13 @@ def run_tests(*tests, pattern=None, ci=False): pass arguments = [sys.executable, '-Werror', '-m', 'unittest'] + if pattern: + arguments.extend(['-k', pattern]) if run_core: print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True) return 1 elif run_download: arguments.append('test.test_download') - elif pattern: - arguments.extend(['-k', pattern]) else: arguments.extend( f'test.test_download.TestDownload.test_{test}' for test in tests) diff --git a/pyproject.toml b/pyproject.toml index 8e3bce4bfc48..96cb368b6d91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,9 +66,16 @@ build = [ "wheel", ] dev = [ - "flake8", - "isort", - "pytest", + "pre-commit", + "yt-dlp[static-analysis]", + "yt-dlp[test]", +] +static-analysis = [ + "autopep8~=2.0", + "ruff~=0.4.4", +] +test = [ + "pytest~=8.1", ] pyinstaller = [ "pyinstaller>=6.3; sys_platform!='darwin'", @@ -126,3 +133,146 @@ artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] [tool.hatch.version] path = "yt_dlp/version.py" pattern = "_pkg_version = '(?P<version>[^']+)'" + +[tool.hatch.envs.default] +features = ["curl-cffi", "default"] +dependencies = ["pre-commit"] +path = ".venv" +installer = "uv" + +[tool.hatch.envs.default.scripts] +setup = "pre-commit install --config .pre-commit-hatch.yaml" +yt-dlp = "python -Werror -Xdev -m yt_dlp {args}" + +[tool.hatch.envs.hatch-static-analysis] +detached = true +features = ["static-analysis"] +dependencies = [] # override hatch ruff version +config-path = "pyproject.toml" + +[tool.hatch.envs.hatch-static-analysis.scripts] +format-check = "autopep8 --diff {args:.}" +format-fix = "autopep8 --in-place {args:.}" +lint-check = "ruff check {args:.}" +lint-fix = "ruff check --fix {args:.}" + +[tool.hatch.envs.hatch-test] +features = ["test"] +dependencies = [ + "pytest-randomly~=3.15", + "pytest-rerunfailures~=14.0", + "pytest-xdist[psutil]~=3.5", +] + +[tool.hatch.envs.hatch-test.scripts] +run = "python -m devscripts.run_tests {args}" +run-cov = "echo Code coverage not implemented && exit 1" + +[[tool.hatch.envs.hatch-test.matrix]] +python = [ + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + "pypy3.8", + "pypy3.9", + "pypy3.10", +] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +ignore = [ + "E402", # module level import not at top of file + "E501", # line too long + "E731", # do not assign a lambda expression, use a def + "E741", # ambiguous variable name +] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # import order +] + +[tool.ruff.lint.per-file-ignores] +"devscripts/lazy_load_template.py" = ["F401"] +"!yt_dlp/extractor/**.py" = ["I"] + +[tool.ruff.lint.isort] +known-first-party = [ + "bundle", + "devscripts", + "test", +] +relative-imports-order = "closest-to-furthest" + +[tool.autopep8] +max_line_length = 120 +recursive = true +exit-code = true +jobs = 0 +select = [ + "E101", + "E112", + "E113", + "E115", + "E116", + "E117", + "E121", + "E122", + "E123", + "E124", + "E125", + "E126", + "E127", + "E128", + "E129", + "E131", + "E201", + "E202", + "E203", + "E211", + "E221", + "E222", + "E223", + "E224", + "E225", + "E226", + "E227", + "E228", + "E231", + "E241", + "E242", + "E251", + "E252", + "E261", + "E262", + "E265", + "E266", + "E271", + "E272", + "E273", + "E274", + "E275", + "E301", + "E302", + "E303", + "E304", + "E305", + "E306", + "E502", + "E701", + "E702", + "E704", + "W391", + "W504", +] + +[tool.pytest.ini_options] +addopts = "-ra -v --strict-markers" +markers = [ + "download", +] diff --git a/setup.cfg b/setup.cfg index aeb4cee58697..340cc3b4d999 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,12 +14,6 @@ remove-duplicate-keys = true remove-unused-variables = true -[tool:pytest] -addopts = -ra -v --strict-markers -markers = - download - - [tox:tox] skipsdist = true envlist = py{38,39,310,311,312},pypy{38,39,310} diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index c1d7c53f5192..1b21fe78e871 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -93,6 +93,7 @@ class SSLTransport(urllib3.util.ssltransport.SSLTransport): This allows us to chain multiple TLS connections. """ + def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): self.incoming = ssl.MemoryBIO() self.outgoing = ssl.MemoryBIO() diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fc18ead3a94b..e287e04bc196 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1,4 +1,5 @@ # flake8: noqa: F401 +# isort: off from .youtube import ( # Youtube is moved to the top to improve performance YoutubeIE, @@ -24,6 +25,8 @@ YoutubeConsentRedirectIE, ) +# isort: on + from .abc import ( ABCIE, ABCIViewIE, @@ -43,27 +46,33 @@ ) from .academicearth import AcademicEarthCourseIE from .acast import ( - ACastIE, ACastChannelIE, + ACastIE, +) +from .acfun import ( + AcFunBangumiIE, + AcFunVideoIE, +) +from .adn import ( + ADNIE, + ADNSeasonIE, ) -from .acfun import AcFunVideoIE, AcFunBangumiIE -from .adn import ADNIE, ADNSeasonIE from .adobeconnect import AdobeConnectIE from .adobetv import ( + AdobeTVChannelIE, AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, - AdobeTVChannelIE, AdobeTVVideoIE, ) from .adultswim import AdultSwimIE from .aenetworks import ( - AENetworksIE, AENetworksCollectionIE, + AENetworksIE, AENetworksShowIE, - HistoryTopicIE, - HistoryPlayerIE, BiographyIE, + HistoryPlayerIE, + HistoryTopicIE, ) from .aeonco import AeonCoIE from .afreecatv import ( @@ -79,77 +88,85 @@ ) from .airtv import AirTVIE from .aitube import AitubeKZVideoIE +from .aliexpress import AliExpressLiveIE from .aljazeera import AlJazeeraIE +from .allocine import AllocineIE from .allstar import ( AllstarIE, AllstarProfileIE, ) from .alphaporno import AlphaPornoIE +from .alsace20tv import ( + Alsace20TVEmbedIE, + Alsace20TVIE, +) from .altcensored import ( - AltCensoredIE, AltCensoredChannelIE, + AltCensoredIE, ) from .alura import ( + AluraCourseIE, AluraIE, - AluraCourseIE ) from .amadeustv import AmadeusTVIE from .amara import AmaraIE -from .amcnetworks import AMCNetworksIE from .amazon import ( - AmazonStoreIE, AmazonReviewsIE, + AmazonStoreIE, ) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, AmazonMiniTVSeriesIE, ) +from .amcnetworks import AMCNetworksIE from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) from .anchorfm import AnchorFMEpisodeIE from .angel import AngelIE +from .antenna import ( + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, + AntennaGrWatchIE, +) from .anvato import AnvatoIE from .aol import AolIE -from .allocine import AllocineIE -from .aliexpress import AliExpressLiveIE -from .alsace20tv import ( - Alsace20TVIE, - Alsace20TVEmbedIE, -) from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE +from .applepodcasts import ApplePodcastsIE from .appletrailers import ( AppleTrailersIE, AppleTrailersSectionIE, ) -from .applepodcasts import ApplePodcastsIE from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, ) from .arcpublishing import ArcPublishingIE -from .arkena import ArkenaIE from .ard import ( + ARDIE, ARDBetaMediathekIE, ARDMediathekCollectionIE, - ARDIE, ) +from .arkena import ArkenaIE +from .arnes import ArnesIE from .art19 import ( Art19IE, Art19ShowIE, ) from .arte import ( - ArteTVIE, + ArteTVCategoryIE, ArteTVEmbedIE, + ArteTVIE, ArteTVPlaylistIE, - ArteTVCategoryIE, ) -from .arnes import ArnesIE -from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE +from .asobichannel import ( + AsobiChannelIE, + AsobiChannelTagURLIE, +) from .asobistage import AsobiStageIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE @@ -160,57 +177,60 @@ AudiodraftCustomIE, AudiodraftGenericIE, ) -from .audiomack import AudiomackIE, AudiomackAlbumIE +from .audiomack import ( + AudiomackAlbumIE, + AudiomackIE, +) from .audius import ( AudiusIE, - AudiusTrackIE, AudiusPlaylistIE, AudiusProfileIE, + AudiusTrackIE, ) from .awaan import ( AWAANIE, - AWAANVideoIE, AWAANLiveIE, AWAANSeasonIE, + AWAANVideoIE, ) from .axs import AxsIE from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .banbye import ( - BanByeIE, BanByeChannelIE, + BanByeIE, ) from .bandaichannel import BandaiChannelIE from .bandcamp import ( - BandcampIE, BandcampAlbumIE, - BandcampWeeklyIE, + BandcampIE, BandcampUserIE, + BandcampWeeklyIE, ) from .bannedvideo import BannedVideoIE from .bbc import ( - BBCCoUkIE, + BBCIE, BBCCoUkArticleIE, + BBCCoUkIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE, - BBCIE, ) -from .beeg import BeegIE -from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE from .beatbump import ( - BeatBumpVideoIE, BeatBumpPlaylistIE, + BeatBumpVideoIE, ) from .beatport import BeatportIE +from .beeg import BeegIE +from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE from .bfmtv import ( BFMTVIE, - BFMTVLiveIE, BFMTVArticleIE, + BFMTVLiveIE, ) from .bibeltv import ( BibelTVLiveIE, @@ -221,37 +241,37 @@ from .bigo import BigoIE from .bild import BildIE from .bilibili import ( - BiliBiliIE, + BilibiliAudioAlbumIE, + BilibiliAudioIE, BiliBiliBangumiIE, - BiliBiliBangumiSeasonIE, BiliBiliBangumiMediaIE, + BiliBiliBangumiSeasonIE, + BilibiliCategoryIE, BilibiliCheeseIE, BilibiliCheeseSeasonIE, - BiliBiliSearchIE, - BilibiliCategoryIE, - BilibiliAudioIE, - BilibiliAudioAlbumIE, - BiliBiliPlayerIE, - BilibiliSpaceVideoIE, - BilibiliSpaceAudioIE, BilibiliCollectionListIE, - BilibiliSeriesListIE, BilibiliFavoritesListIE, - BilibiliWatchlaterIE, + BiliBiliIE, + BiliBiliPlayerIE, BilibiliPlaylistIE, + BiliBiliSearchIE, + BilibiliSeriesListIE, + BilibiliSpaceAudioIE, + BilibiliSpaceVideoIE, + BilibiliWatchlaterIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( - BitChuteIE, BitChuteChannelIE, + BitChuteIE, ) from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( - BleacherReportIE, BleacherReportCMSIE, + BleacherReportIE, ) from .blerp import BlerpIE from .blogger import BloggerIE @@ -264,27 +284,27 @@ from .boxcast import BoxCastVideoIE from .bpb import BpbIE from .br import BRIE -from .bravotv import BravoTVIE from .brainpop import ( - BrainPOPIE, - BrainPOPJrIE, BrainPOPELLIE, BrainPOPEspIE, BrainPOPFrIE, + BrainPOPIE, BrainPOPIlIE, + BrainPOPJrIE, ) +from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) from .brilliantpala import ( - BrilliantpalaElearnIE, BrilliantpalaClassesIE, + BrilliantpalaElearnIE, ) -from .businessinsider import BusinessInsiderIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE +from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE @@ -292,40 +312,40 @@ from .caltrans import CaltransIE from .cam4 import CAM4IE from .camdemy import ( + CamdemyFolderIE, CamdemyIE, - CamdemyFolderIE ) from .camfm import ( CamFMEpisodeIE, - CamFMShowIE + CamFMShowIE, ) from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE -from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .canalplus import CanalplusIE from .caracoltv import CaracolTvPlayIE from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, - CBCPlayerIE, - CBCPlayerPlaylistIE, CBCGemIE, - CBCGemPlaylistIE, CBCGemLiveIE, + CBCGemPlaylistIE, + CBCPlayerIE, + CBCPlayerPlaylistIE, ) from .cbs import ( CBSIE, ParamountPressExpressIE, ) from .cbsnews import ( - CBSNewsEmbedIE, - CBSNewsIE, - CBSLocalIE, CBSLocalArticleIE, + CBSLocalIE, CBSLocalLiveIE, + CBSNewsEmbedIE, + CBSNewsIE, CBSNewsLiveIE, CBSNewsLiveVideoIE, ) @@ -354,12 +374,12 @@ from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE from .cineverse import ( - CineverseIE, CineverseDetailsIE, + CineverseIE, ) from .ciscolive import ( - CiscoLiveSessionIE, CiscoLiveSearchIE, + CiscoLiveSessionIE, ) from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE @@ -372,16 +392,13 @@ from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import ( - CNBCVideoIE, -) +from .cnbc import CNBCVideoIE from .cnn import ( CNNIE, - CNNBlogsIE, CNNArticleIE, + CNNBlogsIE, CNNIndonesiaIE, ) -from .coub import CoubIE from .comedycentral import ( ComedyCentralIE, ComedyCentralTVIE, @@ -399,44 +416,48 @@ from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .coub import CoubIE +from .cozytv import CozyTVIE from .cpac import ( CPACIE, CPACPlaylistIE, ) -from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( - CrowdBunkerIE, CrowdBunkerChannelIE, + CrowdBunkerIE, ) from .crtvg import CrtvgIE from .crunchyroll import ( + CrunchyrollArtistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, CrunchyrollMusicIE, - CrunchyrollArtistIE, ) -from .cspan import CSpanIE, CSpanCongressIE +from .cspan import ( + CSpanCongressIE, + CSpanIE, +) from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( - CuriosityStreamIE, CuriosityStreamCollectionsIE, + CuriosityStreamIE, CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .cybrary import ( + CybraryCourseIE, CybraryIE, - CybraryCourseIE ) from .dacast import ( - DacastVODIE, DacastPlaylistIE, + DacastVODIE, ) from .dailymail import DailyMailIE from .dailymotion import ( @@ -458,8 +479,8 @@ DangalPlaySeasonIE, ) from .daum import ( - DaumIE, DaumClipIE, + DaumIE, DaumPlaylistIE, DaumUserIE, ) @@ -467,49 +488,69 @@ from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import ( - DeezerPlaylistIE, DeezerAlbumIE, + DeezerPlaylistIE, ) from .democracynow import DemocracynowIE from .detik import DetikEmbedIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE, +) +from .dfb import DFBIE +from .dhm import DHMIE +from .digitalconcerthall import DigitalConcertHallIE +from .digiteka import DigitekaIE +from .discogs import DiscogsReleasePlaylistIE +from .discovery import DiscoveryIE +from .disney import DisneyIE +from .dispeak import DigitallySpeakingIE from .dlf import ( DLFIE, DLFCorpusIE, ) -from .dfb import DFBIE -from .dhm import DHMIE +from .dlive import ( + DLiveStreamIE, + DLiveVODIE, +) from .douyutv import ( DouyuShowIE, DouyuTVIE, ) from .dplay import ( - DPlayIE, - DiscoveryPlusIE, - HGTVDeIE, - GoDiscoveryIE, - TravelChannelIE, + TLCIE, + AmHistoryChannelIE, + AnimalPlanetIE, CookingChannelIE, - HGTVUsaIE, - FoodNetworkIE, - InvestigationDiscoveryIE, DestinationAmericaIE, - AmHistoryChannelIE, - ScienceChannelIE, - DIYNetworkIE, DiscoveryLifeIE, - AnimalPlanetIE, - TLCIE, - MotorTrendIE, - MotorTrendOnDemandIE, - DiscoveryPlusIndiaIE, DiscoveryNetworksDeIE, + DiscoveryPlusIE, + DiscoveryPlusIndiaIE, + DiscoveryPlusIndiaShowIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, - DiscoveryPlusIndiaShowIE, + DIYNetworkIE, + DPlayIE, + FoodNetworkIE, GlobalCyclingNetworkPlusIE, + GoDiscoveryIE, + HGTVDeIE, + HGTVUsaIE, + InvestigationDiscoveryIE, + MotorTrendIE, + MotorTrendOnDemandIE, + ScienceChannelIE, + TravelChannelIE, ) -from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE +from .dreisat import DreiSatIE +from .drooble import DroobleIE +from .dropbox import DropboxIE +from .dropout import ( + DropoutIE, + DropoutSeasonIE, +) from .drtuber import DrTuberIE from .drtv import ( DRTVIE, @@ -518,32 +559,21 @@ DRTVSeriesIE, ) from .dtube import DTubeIE -from .dvtv import DVTVIE from .duboku import ( DubokuIE, - DubokuPlaylistIE + DubokuPlaylistIE, ) from .dumpert import DumpertIE -from .deuxm import ( - DeuxMIE, - DeuxMNewsIE -) -from .digitalconcerthall import DigitalConcertHallIE -from .discogs import DiscogsReleasePlaylistIE -from .discovery import DiscoveryIE -from .disney import DisneyIE -from .dispeak import DigitallySpeakingIE -from .dropbox import DropboxIE -from .dropout import ( - DropoutSeasonIE, - DropoutIE -) from .duoplay import DuoplayIE +from .dvtv import DVTVIE from .dw import ( DWIE, DWArticleIE, ) -from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE +from .eagleplatform import ( + ClipYouEmbedIE, + EaglePlatformIE, +) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( @@ -567,8 +597,8 @@ from .eporner import EpornerIE from .erocast import ErocastIE from .eroprofile import ( - EroProfileIE, EroProfileAlbumIE, + EroProfileIE, ) from .err import ERRJupiterIE from .ertgr import ( @@ -578,31 +608,33 @@ ) from .espn import ( ESPNIE, - WatchESPNIE, ESPNArticleIE, - FiveThirtyEightIE, ESPNCricInfoIE, + FiveThirtyEightIE, + WatchESPNIE, ) from .ettutv import EttuTvIE -from .europa import EuropaIE, EuroParlWebstreamIE +from .europa import ( + EuropaIE, + EuroParlWebstreamIE, +) from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE from .expressen import ExpressenIE from .eyedotv import EyedoTVIE from .facebook import ( + FacebookAdsIE, FacebookIE, FacebookPluginsVideoIE, FacebookRedirectURLIE, FacebookReelIE, - FacebookAdsIE, ) -from .fathom import FathomIE from .fancode import ( + FancodeLiveIE, FancodeVodIE, - FancodeLiveIE ) - +from .fathom import FathomIE from .faz import FazIE from .fc2 import ( FC2IE, @@ -612,8 +644,8 @@ from .fczenit import FczenitIE from .fifa import FifaIE from .filmon import ( - FilmOnIE, FilmOnChannelIE, + FilmOnIE, ) from .filmweb import FilmwebIE from .firsttv import FirstTVIE @@ -621,17 +653,17 @@ from .flextv import FlexTVIE from .flickr import FlickrIE from .floatplane import ( - FloatplaneIE, FloatplaneChannelIE, + FloatplaneIE, ) from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import ( FourTubeIE, - PornTubeIE, - PornerBrosIE, FuxIE, + PornerBrosIE, + PornTubeIE, ) from .fox import FOXIE from .fox9 import ( @@ -639,8 +671,8 @@ FOX9NewsIE, ) from .foxnews import ( - FoxNewsIE, FoxNewsArticleIE, + FoxNewsIE, FoxNewsVideoIE, ) from .foxsports import FoxSportsIE @@ -648,20 +680,20 @@ from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, - FranceTVSiteIE, FranceTVInfoIE, + FranceTVSiteIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .frontendmasters import ( - FrontendMastersIE, - FrontendMastersLessonIE, - FrontendMastersCourseIE -) from .freetv import ( FreeTvIE, FreeTvMoviesIE, ) +from .frontendmasters import ( + FrontendMastersCourseIE, + FrontendMastersIE, + FrontendMastersLessonIE, +) from .fujitv import FujiTVFODPlus7IE from .funimation import ( FunimationIE, @@ -672,17 +704,17 @@ from .funker530 import Funker530IE from .fuyintv import FuyinTVIE from .gab import ( - GabTVIE, GabIE, + GabTVIE, ) from .gaia import GaiaIE from .gamejolt import ( - GameJoltIE, - GameJoltUserIE, + GameJoltCommunityIE, GameJoltGameIE, GameJoltGameSoundtrackIE, - GameJoltCommunityIE, + GameJoltIE, GameJoltSearchIE, + GameJoltUserIE, ) from .gamespot import GameSpotIE from .gamestar import GameStarIE @@ -691,13 +723,17 @@ from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .genericembeds import ( + HTML5MediaEmbedIE, + QuotedHTMLIE, +) from .genius import ( GeniusIE, GeniusLyricsIE, ) from .getcourseru import ( + GetCourseRuIE, GetCourseRuPlayerIE, - GetCourseRuIE ) from .gettr import ( GettrIE, @@ -706,41 +742,45 @@ from .giantbomb import GiantBombIE from .glide import GlideIE from .globalplayer import ( + GlobalPlayerAudioEpisodeIE, + GlobalPlayerAudioIE, GlobalPlayerLiveIE, GlobalPlayerLivePlaylistIE, - GlobalPlayerAudioIE, - GlobalPlayerAudioEpisodeIE, - GlobalPlayerVideoIE + GlobalPlayerVideoIE, ) from .globo import ( - GloboIE, GloboArticleIE, + GloboIE, +) +from .glomex import ( + GlomexEmbedIE, + GlomexIE, ) from .gmanetwork import GMANetworkVideoIE from .go import GoIE -from .godtube import GodTubeIE from .godresource import GodResourceIE +from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE from .googledrive import ( - GoogleDriveIE, GoogleDriveFolderIE, + GoogleDriveIE, ) from .googlepodcasts import ( - GooglePodcastsIE, GooglePodcastsFeedIE, + GooglePodcastsIE, ) from .googlesearch import GoogleSearchIE -from .gopro import GoProIE from .goplay import GoPlayIE +from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE from .gronkh import ( - GronkhIE, GronkhFeedIE, - GronkhVodsIE + GronkhIE, + GronkhVodsIE, ) from .groupon import GrouponIE from .harpodeon import HarpodeonIE @@ -749,10 +789,10 @@ from .heise import HeiseIE from .hellporno import HellPornoIE from .hgtv import HGTVComShowIE -from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitrecord import HitRecordIE +from .hketv import HKETVIE from .hollywoodreporter import ( HollywoodReporterIE, HollywoodReporterPlaylistIE, @@ -761,8 +801,8 @@ from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, - HotStarPrefixIE, HotStarPlaylistIE, + HotStarPrefixIE, HotStarSeasonIE, HotStarSeriesIE, ) @@ -773,34 +813,30 @@ HRTiPlaylistIE, ) from .hse import ( - HSEShowIE, HSEProductIE, -) -from .genericembeds import ( - HTML5MediaEmbedIE, - QuotedHTMLIE, + HSEShowIE, ) from .huajiao import HuajiaoIE -from .huya import HuyaLiveIE from .huffpost import HuffPostIE from .hungama import ( + HungamaAlbumPlaylistIE, HungamaIE, HungamaSongIE, - HungamaAlbumPlaylistIE, ) +from .huya import HuyaLiveIE from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( - IchinanaLiveIE, IchinanaLiveClipIE, + IchinanaLiveIE, ) from .idolplus import IdolPlusIE from .ign import ( IGNIE, - IGNVideoIE, IGNArticleIE, + IGNVideoIE, ) from .iheart import ( IHeartRadioIE, @@ -810,12 +846,12 @@ from .iltalehti import IltalehtiIE from .imdb import ( ImdbIE, - ImdbListIE + ImdbListIE, ) from .imgur import ( - ImgurIE, ImgurAlbumIE, ImgurGalleryIE, + ImgurIE, ) from .ina import InaIE from .inc import IncIE @@ -824,20 +860,20 @@ from .instagram import ( InstagramIE, InstagramIOSIE, - InstagramUserIE, - InstagramTagIE, InstagramStoryIE, + InstagramTagIE, + InstagramUserIE, ) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import ( + IPrimaCNNIE, IPrimaIE, - IPrimaCNNIE ) from .iqiyi import ( - IqiyiIE, + IqAlbumIE, IqIE, - IqAlbumIE + IqiyiIE, ) from .islamchannel import ( IslamChannelIE, @@ -845,16 +881,16 @@ ) from .israelnationalnews import IsraelNationalNewsIE from .itprotv import ( + ITProTVCourseIE, ITProTVIE, - ITProTVCourseIE ) from .itv import ( - ITVIE, ITVBTCCIE, + ITVIE, ) from .ivi import ( + IviCompilationIE, IviIE, - IviCompilationIE ) from .ivideon import IvideonIE from .iwara import ( @@ -865,15 +901,15 @@ from .ixigua import IxiguaIE from .izlesene import IzleseneIE from .jamendo import ( - JamendoIE, JamendoAlbumIE, + JamendoIE, ) from .japandiet import ( + SangiinIE, + SangiinInstructionIE, ShugiinItvLiveIE, ShugiinItvLiveRoomIE, ShugiinItvVodIE, - SangiinInstructionIE, - SangiinIE, ) from .jeuxvideo import JeuxVideoIE from .jiocinema import ( @@ -881,13 +917,13 @@ JioCinemaSeriesIE, ) from .jiosaavn import ( - JioSaavnSongIE, JioSaavnAlbumIE, JioSaavnPlaylistIE, + JioSaavnSongIE, ) -from .jove import JoveIE from .joj import JojIE from .joqrag import JoqrAgIE +from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( JTBCIE, @@ -914,17 +950,17 @@ from .kommunetv import KommunetvIE from .kompas import KompasVideoIE from .koo import KooIE -from .kth import KTHIE from .krasview import KrasViewIE +from .kth import KTHIE from .ku6 import Ku6IE from .kukululive import KukuluLiveIE from .kuwo import ( - KuwoIE, KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, KuwoCategoryIE, + KuwoChartIE, + KuwoIE, KuwoMvIE, + KuwoSingerIE, ) from .la7 import ( LA7IE, @@ -944,14 +980,14 @@ ) from .lci import LCIIE from .lcp import ( - LcpPlayIE, LcpIE, + LcpPlayIE, ) from .lecture2go import Lecture2GoIE from .lecturio import ( - LecturioIE, LecturioCourseIE, LecturioDeCourseIE, + LecturioIE, ) from .leeco import ( LeIE, @@ -968,22 +1004,22 @@ from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( - LifeNewsIE, LifeEmbedIE, + LifeNewsIE, ) from .likee import ( LikeeIE, - LikeeUserIE + LikeeUserIE, ) from .limelight import ( - LimelightMediaIE, LimelightChannelIE, LimelightChannelListIE, + LimelightMediaIE, ) from .linkedin import ( LinkedInIE, - LinkedInLearningIE, LinkedInLearningCourseIE, + LinkedInLearningIE, ) from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE @@ -1000,25 +1036,23 @@ LnkIE, ) from .loom import ( - LoomIE, LoomFolderIE, + LoomIE, ) from .lovehomeporn import LoveHomePornIE from .lrt import ( LRTVODIE, - LRTStreamIE + LRTStreamIE, ) from .lsm import ( LSMLREmbedIE, LSMLTVEmbedIE, - LSMReplayIE -) -from .lumni import ( - LumniIE + LSMReplayIE, ) +from .lumni import LumniIE from .lynda import ( + LyndaCourseIE, LyndaIE, - LyndaCourseIE ) from .maariv import MaarivIE from .magellantv import MagellanTVIE @@ -1030,13 +1064,13 @@ ) from .mainstreaming import MainStreamingIE from .mangomolo import ( - MangomoloVideoIE, MangomoloLiveIE, + MangomoloVideoIE, ) from .manoto import ( ManotoTVIE, - ManotoTVShowIE, ManotoTVLiveIE, + ManotoTVShowIE, ) from .manyvids import ManyVidsIE from .maoritv import MaoriTVIE @@ -1052,13 +1086,14 @@ from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE +from .medialaan import MedialaanIE from .mediaset import ( MediasetIE, MediasetShowIE, ) from .mediasite import ( - MediasiteIE, MediasiteCatalogIE, + MediasiteIE, MediasiteNamedCatalogIE, ) from .mediastream import ( @@ -1068,26 +1103,30 @@ from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE +from .megatvcom import ( + MegaTVComEmbedIE, + MegaTVComIE, +) from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE +from .microsoftembed import MicrosoftEmbedIE from .microsoftstream import MicrosoftStreamIE from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, + MicrosoftVirtualAcademyIE, ) -from .microsoftembed import MicrosoftEmbedIE from .mildom import ( - MildomIE, - MildomVodIE, MildomClipIE, + MildomIE, MildomUserVodIE, + MildomVodIE, ) from .minds import ( - MindsIE, MindsChannelIE, MindsGroupIE, + MindsIE, ) from .minoto import MinotoIE from .mirrativ import ( @@ -1095,31 +1134,34 @@ MirrativUserIE, ) from .mirrorcouk import MirrorCoUKIE -from .mit import TechTVMITIE, OCWMITIE +from .mit import ( + OCWMITIE, + TechTVMITIE, +) from .mitele import MiTeleIE from .mixch import ( - MixchIE, MixchArchiveIE, + MixchIE, ) from .mixcloud import ( MixcloudIE, - MixcloudUserIE, MixcloudPlaylistIE, + MixcloudUserIE, ) from .mlb import ( MLBIE, - MLBVideoIE, MLBTVIE, MLBArticleIE, + MLBVideoIE, ) from .mlssoccer import MLSSoccerIE from .mocha import MochaVideoIE from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .motherless import ( - MotherlessIE, - MotherlessGroupIE, MotherlessGalleryIE, + MotherlessGroupIE, + MotherlessIE, MotherlessUploaderIE, ) from .motorsport import MotorsportIE @@ -1129,23 +1171,26 @@ from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( - MTVIE, - MTVVideoIE, - MTVServicesEmbeddedIE, MTVDEIE, - MTVJapanIE, + MTVIE, MTVItaliaIE, MTVItaliaProgrammaIE, + MTVJapanIE, + MTVServicesEmbeddedIE, + MTVVideoIE, ) from .muenchentv import MuenchenTVIE -from .murrtube import MurrtubeIE, MurrtubeUserIE +from .murrtube import ( + MurrtubeIE, + MurrtubeUserIE, +) from .museai import MuseAIIE from .musescore import MuseScoreIE from .musicdex import ( - MusicdexSongIE, MusicdexAlbumIE, MusicdexArtistIE, MusicdexPlaylistIE, + MusicdexSongIE, ) from .mx3 import ( Mx3IE, @@ -1156,7 +1201,10 @@ MxplayerIE, MxplayerShowIE, ) -from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspace import ( + MySpaceAlbumIE, + MySpaceIE, +) from .myspass import MySpassIE from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE @@ -1170,8 +1218,8 @@ NateProgramIE, ) from .nationalgeographic import ( - NationalGeographicVideoIE, NationalGeographicTVIE, + NationalGeographicVideoIE, ) from .naver import ( NaverIE, @@ -1179,12 +1227,12 @@ NaverNowIE, ) from .nba import ( - NBAWatchEmbedIE, - NBAWatchIE, - NBAWatchCollectionIE, - NBAEmbedIE, NBAIE, NBAChannelIE, + NBAEmbedIE, + NBAWatchCollectionIE, + NBAWatchEmbedIE, + NBAWatchIE, ) from .nbc import ( NBCIE, @@ -1198,35 +1246,35 @@ ) from .ndr import ( NDRIE, - NJoyIE, NDREmbedBaseIE, NDREmbedIE, NJoyEmbedIE, + NJoyIE, ) from .ndtv import NDTVIE from .nebula import ( - NebulaIE, + NebulaChannelIE, NebulaClassIE, + NebulaIE, NebulaSubscriptionsIE, - NebulaChannelIE, ) from .nekohacker import NekoHackerIE from .nerdcubed import NerdCubedFeedIE -from .netzkino import NetzkinoIE from .neteasemusic import ( - NetEaseMusicIE, NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, + NetEaseMusicDjRadioIE, + NetEaseMusicIE, NetEaseMusicListIE, NetEaseMusicMvIE, NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, + NetEaseMusicSingerIE, ) from .netverse import ( NetverseIE, NetversePlaylistIE, NetverseSearchIE, ) +from .netzkino import NetzkinoIE from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, @@ -1235,14 +1283,14 @@ from .newspicks import NewsPicksIE from .newsy import NewsyIE from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, AppleDailyIE, + NextMediaActionNewsIE, + NextMediaIE, NextTVIE, ) from .nexx import ( - NexxIE, NexxEmbedIE, + NexxIE, ) from .nfb import ( NFBIE, @@ -1256,43 +1304,43 @@ NFLPlusReplayIE, ) from .nhk import ( - NhkVodIE, - NhkVodProgramIE, NhkForSchoolBangumiIE, - NhkForSchoolSubjectIE, NhkForSchoolProgramListIE, + NhkForSchoolSubjectIE, NhkRadioNewsPageIE, NhkRadiruIE, NhkRadiruLiveIE, + NhkVodIE, + NhkVodProgramIE, ) from .nhl import NHLIE from .nick import ( - NickIE, NickBrIE, NickDeIE, + NickIE, NickRuIE, ) from .niconico import ( + NiconicoHistoryIE, NiconicoIE, + NiconicoLiveIE, NiconicoPlaylistIE, - NiconicoUserIE, NiconicoSeriesIE, - NiconicoHistoryIE, + NiconicoUserIE, NicovideoSearchDateIE, NicovideoSearchIE, NicovideoSearchURLIE, NicovideoTagURLIE, - NiconicoLiveIE, +) +from .niconicochannelplus import ( + NiconicoChannelPlusChannelLivesIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusIE, ) from .ninaprotocol import NinaProtocolIE from .ninecninemedia import ( - NineCNineMediaIE, CPTwentyFourIE, -) -from .niconicochannelplus import ( - NiconicoChannelPlusIE, - NiconicoChannelPlusChannelVideosIE, - NiconicoChannelPlusChannelLivesIE, + NineCNineMediaIE, ) from .ninegag import NineGagIE from .ninenews import NineNewsIE @@ -1317,24 +1365,24 @@ ) from .noz import NozIE from .npo import ( - AndereTijdenIE, NPOIE, + VPROIE, + WNLIE, + AndereTijdenIE, + HetKlokhuisIE, NPOLiveIE, - NPORadioIE, NPORadioFragmentIE, + NPORadioIE, SchoolTVIE, - HetKlokhuisIE, - VPROIE, - WNLIE, ) from .npr import NprIE from .nrk import ( NRKIE, + NRKTVIE, NRKPlaylistIE, + NRKRadioPodkastIE, NRKSkoleIE, - NRKTVIE, NRKTVDirekteIE, - NRKRadioPodkastIE, NRKTVEpisodeIE, NRKTVEpisodesIE, NRKTVSeasonIE, @@ -1346,18 +1394,18 @@ from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nubilesporn import NubilesPornIE -from .nytimes import ( - NYTimesIE, - NYTimesArticleIE, - NYTimesCookingIE, - NYTimesCookingRecipeIE, -) from .nuum import ( NuumLiveIE, - NuumTabIE, NuumMediaIE, + NuumTabIE, ) from .nuvid import NuvidIE +from .nytimes import ( + NYTimesArticleIE, + NYTimesCookingIE, + NYTimesCookingRecipeIE, + NYTimesIE, +) from .nzherald import NZHeraldIE from .nzonscreen import NZOnScreenIE from .nzz import NZZIE @@ -1365,7 +1413,7 @@ from .odnoklassniki import OdnoklassnikiIE from .oftv import ( OfTVIE, - OfTVPlaylistIE + OfTVPlaylistIE, ) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE @@ -1378,8 +1426,8 @@ from .onenewsnz import OneNewsNZIE from .oneplace import OnePlacePodcastIE from .onet import ( - OnetIE, OnetChannelIE, + OnetIE, OnetMVPIE, OnetPlIE, ) @@ -1389,33 +1437,33 @@ OpencastPlaylistIE, ) from .openrec import ( - OpenRecIE, OpenRecCaptureIE, + OpenRecIE, OpenRecMovieIE, ) from .ora import OraTVIE from .orf import ( - ORFFM4StoryIE, + ORFIPTVIE, ORFONIE, - ORFRadioIE, + ORFFM4StoryIE, ORFPodcastIE, - ORFIPTVIE, + ORFRadioIE, ) from .outsidetv import OutsideTVIE from .owncloud import OwnCloudIE from .packtpub import ( - PacktPubIE, PacktPubCourseIE, + PacktPubIE, ) from .palcomp3 import ( - PalcoMP3IE, PalcoMP3ArtistIE, + PalcoMP3IE, PalcoMP3VideoIE, ) from .panopto import ( PanoptoIE, PanoptoListIE, - PanoptoPlaylistIE + PanoptoPlaylistIE, ) from .paramountplus import ( ParamountPlusIE, @@ -1424,12 +1472,18 @@ from .parler import ParlerIE from .parlview import ParlviewIE from .patreon import ( + PatreonCampaignIE, PatreonIE, - PatreonCampaignIE ) -from .pbs import PBSIE, PBSKidsIE +from .pbs import ( + PBSIE, + PBSKidsIE, +) from .pearvideo import PearVideoIE -from .peekvids import PeekVidsIE, PlayVidsIE +from .peekvids import ( + PeekVidsIE, + PlayVidsIE, +) from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, @@ -1437,7 +1491,7 @@ from .peertv import PeerTVIE from .peloton import ( PelotonIE, - PelotonLiveIE + PelotonLiveIE, ) from .performgroup import PerformGroupIE from .periscope import ( @@ -1457,8 +1511,8 @@ from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pinterest import ( - PinterestIE, PinterestCollectionIE, + PinterestIE, ) from .pixivsketch import ( PixivSketchIE, @@ -1467,19 +1521,22 @@ from .pladform import PladformIE from .planetmarathi import PlanetMarathiIE from .platzi import ( - PlatziIE, PlatziCourseIE, + PlatziIE, ) from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE from .playwire import PlaywireIE -from .plutotv import PlutoTVIE from .pluralsight import ( - PluralsightIE, PluralsightCourseIE, + PluralsightIE, +) +from .plutotv import PlutoTVIE +from .podbayfm import ( + PodbayFMChannelIE, + PodbayFMIE, ) -from .podbayfm import PodbayFMIE, PodbayFMChannelIE from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( @@ -1487,15 +1544,15 @@ PokemonWatchIE, ) from .pokergo import ( - PokerGoIE, PokerGoCollectionIE, + PokerGoIE, ) from .polsatgo import PolsatGoIE from .polskieradio import ( - PolskieRadioIE, - PolskieRadioLegacyIE, PolskieRadioAuditionIE, PolskieRadioCategoryIE, + PolskieRadioIE, + PolskieRadioLegacyIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, PolskieRadioPodcastListIE, @@ -1506,57 +1563,62 @@ from .pornflip import PornFlipIE from .pornhub import ( PornHubIE, - PornHubUserIE, - PornHubPlaylistIE, PornHubPagedVideoListIE, + PornHubPlaylistIE, + PornHubUserIE, PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE -from .puhutv import ( - PuhuTVIE, - PuhuTVSerieIE, -) from .pr0gramm import Pr0grammIE -from .prankcast import PrankCastIE, PrankCastPostIE +from .prankcast import ( + PrankCastIE, + PrankCastPostIE, +) from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE from .prosiebensat1 import ProSiebenSat1IE from .prx import ( - PRXStoryIE, - PRXSeriesIE, PRXAccountIE, + PRXSeriesIE, + PRXSeriesSearchIE, PRXStoriesSearchIE, - PRXSeriesSearchIE + PRXStoryIE, +) +from .puhutv import ( + PuhuTVIE, + PuhuTVSerieIE, ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qdance import QDanceIE from .qingting import QingTingIE from .qqmusic import ( + QQMusicAlbumIE, QQMusicIE, + QQMusicPlaylistIE, QQMusicSingerIE, - QQMusicAlbumIE, QQMusicToplistIE, - QQMusicPlaylistIE, ) from .r7 import ( R7IE, R7ArticleIE, ) -from .radiko import RadikoIE, RadikoRadioIE +from .radiko import ( + RadikoIE, + RadikoRadioIE, +) from .radiocanada import ( - RadioCanadaIE, RadioCanadaAudioVideoIE, + RadioCanadaIE, ) from .radiocomercial import ( RadioComercialIE, RadioComercialPlaylistIE, ) from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE from .radiofrance import ( FranceCultureIE, RadioFranceIE, @@ -1565,35 +1627,36 @@ RadioFranceProfileIE, RadioFranceProgramScheduleIE, ) -from .radiozet import RadioZetPodcastIE +from .radiojavan import RadioJavanIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) +from .radiozet import RadioZetPodcastIE from .radlive import ( - RadLiveIE, RadLiveChannelIE, + RadLiveIE, RadLiveSeasonIE, ) from .rai import ( - RaiIE, RaiCulturaIE, + RaiIE, + RaiNewsIE, RaiPlayIE, RaiPlayLiveIE, RaiPlayPlaylistIE, RaiPlaySoundIE, RaiPlaySoundLiveIE, RaiPlaySoundPlaylistIE, - RaiNewsIE, RaiSudtirolIE, ) from .raywenderlich import ( - RayWenderlichIE, RayWenderlichCourseIE, + RayWenderlichIE, ) from .rbgtum import ( - RbgTumIE, RbgTumCourseIE, + RbgTumIE, RbgTumNewCourseIE, ) from .rcs import ( @@ -1607,12 +1670,15 @@ RCTIPlusTVIE, ) from .rds import RDSIE -from .redbee import ParliamentLiveUKIE, RTBFIE +from .redbee import ( + RTBFIE, + ParliamentLiveUKIE, +) from .redbulltv import ( - RedBullTVIE, RedBullEmbedIE, - RedBullTVRrnContentIE, RedBullIE, + RedBullTVIE, + RedBullTVRrnContentIE, ) from .reddit import RedditIE from .redge import RedCDNLivxIE @@ -1632,107 +1698,100 @@ from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( - RinseFMIE, RinseFMArtistPlaylistIE, + RinseFMIE, ) from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( - RokfinIE, - RokfinStackIE, RokfinChannelIE, + RokfinIE, RokfinSearchIE, + RokfinStackIE, +) +from .roosterteeth import ( + RoosterTeethIE, + RoosterTeethSeriesIE, ) -from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE from .rozhlas import ( + MujRozhlasIE, RozhlasIE, RozhlasVltavaIE, - MujRozhlasIE, ) -from .rte import RteIE, RteRadioIE +from .rte import ( + RteIE, + RteRadioIE, +) +from .rtl2 import RTL2IE from .rtlnl import ( - RtlNlIE, - RTLLuTeleVODIE, RTLLuArticleIE, RTLLuLiveIE, RTLLuRadioIE, + RTLLuTeleVODIE, + RtlNlIE, ) -from .rtl2 import RTL2IE from .rtnews import ( - RTNewsIE, RTDocumentryIE, RTDocumentryPlaylistIE, + RTNewsIE, RuptlyIE, ) from .rtp import RTPIE from .rtrfm import RTRFMIE from .rts import RTSIE from .rtvcplay import ( - RTVCPlayIE, - RTVCPlayEmbedIE, RTVCKalturaIE, + RTVCPlayEmbedIE, + RTVCPlayIE, ) from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, - RTVELiveIE, RTVEInfantilIE, + RTVELiveIE, RTVETelevisionIE, ) from .rtvs import RTVSIE from .rtvslo import RTVSLOIE +from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( - RumbleEmbedIE, - RumbleIE, RumbleChannelIE, -) -from .rudovideo import RudoVideoIE -from .rutube import ( - RutubeIE, - RutubeChannelIE, - RutubeEmbedIE, - RutubeMovieIE, - RutubePersonIE, - RutubePlaylistIE, - RutubeTagsIE, -) -from .glomex import ( - GlomexIE, - GlomexEmbedIE, -) -from .megatvcom import ( - MegaTVComIE, - MegaTVComEmbedIE, -) -from .antenna import ( - AntennaGrWatchIE, - Ant1NewsGrArticleIE, - Ant1NewsGrEmbedIE, + RumbleEmbedIE, + RumbleIE, +) +from .rutube import ( + RutubeChannelIE, + RutubeEmbedIE, + RutubeIE, + RutubeMovieIE, + RutubePersonIE, + RutubePlaylistIE, + RutubeTagsIE, ) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, - RuvSpilaIE + RuvSpilaIE, ) from .s4c import ( S4CIE, - S4CSeriesIE + S4CSeriesIE, ) from .safari import ( - SafariIE, SafariApiIE, SafariCourseIE, + SafariIE, ) from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE from .sbs import SBSIE from .sbscokr import ( - SBSCoKrIE, SBSCoKrAllvodProgramIE, + SBSCoKrIE, SBSCoKrProgramsVodIE, ) from .screen9 import Screen9IE @@ -1740,24 +1799,27 @@ from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ( - ScrippsNetworksWatchIE, ScrippsNetworksIE, + ScrippsNetworksWatchIE, ) +from .scrolller import ScrolllerIE from .scte import ( SCTEIE, SCTECourseIE, ) -from .scrolller import ScrolllerIE from .sejmpl import SejmIE from .senalcolombia import SenalColombiaLiveIE -from .senategov import SenateISVPIE, SenateGovIE +from .senategov import ( + SenateGovIE, + SenateISVPIE, +) from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE from .seznamzpravy import ( - SeznamZpravyIE, SeznamZpravyArticleIE, + SeznamZpravyIE, ) from .shahid import ( ShahidIE, @@ -1765,38 +1827,38 @@ ) from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE -from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE +from .sibnet import SibnetEmbedIE from .simplecast import ( - SimplecastIE, SimplecastEpisodeIE, + SimplecastIE, SimplecastPodcastIE, ) from .sina import SinaIE from .sixplay import SixPlayIE from .skeb import SkebIE +from .sky import ( + SkyNewsIE, + SkyNewsStoryIE, + SkySportsIE, + SkySportsNewsIE, +) from .skyit import ( + CieloTVItIE, + SkyItArteIE, + SkyItIE, SkyItPlayerIE, SkyItVideoIE, SkyItVideoLiveIE, - SkyItIE, - SkyItArteIE, - CieloTVItIE, TV8ItIE, ) from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( - SkyNewsArabiaIE, SkyNewsArabiaArticleIE, + SkyNewsArabiaIE, ) from .skynewsau import SkyNewsAUIE -from .sky import ( - SkyNewsIE, - SkyNewsStoryIE, - SkySportsIE, - SkySportsNewsIE, -) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE @@ -1813,29 +1875,29 @@ from .soundcloud import ( SoundcloudEmbedIE, SoundcloudIE, - SoundcloudSetIE, + SoundcloudPlaylistIE, SoundcloudRelatedIE, + SoundcloudSearchIE, + SoundcloudSetIE, + SoundcloudTrackStationIE, SoundcloudUserIE, SoundcloudUserPermalinkIE, - SoundcloudTrackStationIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, - SoundgasmProfileIE + SoundgasmProfileIE, ) from .southpark import ( - SouthParkIE, SouthParkDeIE, SouthParkDkIE, SouthParkEsIE, + SouthParkIE, SouthParkLatIE, - SouthParkNlIE + SouthParkNlIE, ) from .sovietscloset import ( SovietsClosetIE, - SovietsClosetPlaylistIE + SovietsClosetPlaylistIE, ) from .spankbang import ( SpankBangIE, @@ -1846,12 +1908,6 @@ BellatorIE, ParamountNetworkIE, ) -from .stageplus import StagePlusVODConcertIE -from .startrek import StarTrekIE -from .stitcher import ( - StitcherIE, - StitcherShowIE, -) from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE @@ -1875,19 +1931,25 @@ from .stacommu import ( StacommuLiveIE, StacommuVODIE, - TheaterComplexTownVODIE, TheaterComplexTownPPVIE, + TheaterComplexTownVODIE, ) +from .stageplus import StagePlusVODConcertIE from .stanfordoc import StanfordOpenClassroomIE +from .startrek import StarTrekIE from .startv import StarTVIE from .steam import ( - SteamIE, SteamCommunityBroadcastIE, + SteamIE, +) +from .stitcher import ( + StitcherIE, + StitcherShowIE, ) from .storyfire import ( StoryFireIE, - StoryFireUserIE, StoryFireSeriesIE, + StoryFireUserIE, ) from .streamable import StreamableIE from .streamcz import StreamCZIE @@ -1908,26 +1970,26 @@ SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .syvdk import SYVDKIE from .syfy import SyfyIE +from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .taptap import ( - TapTapMomentIE, TapTapAppIE, TapTapAppIntlIE, + TapTapMomentIE, TapTapPostIntlIE, ) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( TBSJPEpisodeIE, - TBSJPProgramIE, TBSJPPlaylistIE, + TBSJPProgramIE, ) from .teachable import ( - TeachableIE, TeachableCourseIE, + TeachableIE, ) from .teachertube import ( TeacherTubeIE, @@ -1935,8 +1997,8 @@ ) from .teachingchannel import TeachingChannelIE from .teamcoco import ( - TeamcocoIE, ConanClassicIE, + TeamcocoIE, ) from .teamtreehouse import TeamTreeHouseIE from .ted import ( @@ -1955,15 +2017,18 @@ from .telemb import TeleMBIE from .telemundo import TelemundoIE from .telequebec import ( - TeleQuebecIE, - TeleQuebecSquatIE, TeleQuebecEmissionIE, + TeleQuebecIE, TeleQuebecLiveIE, + TeleQuebecSquatIE, TeleQuebecVideoIE, ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE -from .tempo import TempoIE, IVXPlayerIE +from .tempo import ( + IVXPlayerIE, + TempoIE, +) from .tencent import ( IflixEpisodeIE, IflixSeriesIE, @@ -1987,8 +2052,8 @@ from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( - ThePlatformIE, ThePlatformFeedIE, + ThePlatformIE, ) from .thestar import TheStarIE from .thesun import TheSunIE @@ -2000,50 +2065,51 @@ ThisVidMemberIE, ThisVidPlaylistIE, ) +from .threeqsdn import ThreeQSDNIE from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, ) -from .threeqsdn import ThreeQSDNIE from .tiktok import ( + DouyinIE, + TikTokEffectIE, TikTokIE, - TikTokUserIE, + TikTokLiveIE, TikTokSoundIE, - TikTokEffectIE, TikTokTagIE, + TikTokUserIE, TikTokVMIE, - TikTokLiveIE, - DouyinIE, ) from .tmz import TMZIE from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, EMPFlixIE, MovieFapIE, + TNAFlixIE, + TNAFlixNetworkEmbedIE, ) from .toggle import ( - ToggleIE, MeWatchIE, + ToggleIE, ) -from .toggo import ( - ToggoIE, -) +from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE +from .toypics import ( + ToypicsIE, + ToypicsUserIE, +) from .traileraddict import TrailerAddictIE from .triller import ( TrillerIE, - TrillerUserIE, TrillerShortIE, + TrillerUserIE, ) from .trovo import ( + TrovoChannelClipIE, + TrovoChannelVodIE, TrovoIE, TrovoVodIE, - TrovoChannelVodIE, - TrovoChannelClipIE, ) from .trtcocuk import TrtCocukVideoIE from .trtworld import TrtWorldIE @@ -2052,26 +2118,26 @@ from .truth import TruthIE from .trutv import TruTVIE from .tube8 import Tube8IE -from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE +from .tubetugraz import ( + TubeTuGrazIE, + TubeTuGrazSeriesIE, +) from .tubitv import ( TubiTvIE, TubiTvShowIE, ) from .tumblr import TumblrIE from .tunein import ( - TuneInStationIE, - TuneInPodcastIE, TuneInPodcastEpisodeIE, + TuneInPodcastIE, TuneInShortenerIE, + TuneInStationIE, ) from .tv2 import ( TV2IE, - TV2ArticleIE, KatsomoIE, MTVUutisetArticleIE, -) -from .tv24ua import ( - TV24UAVideoIE, + TV2ArticleIE, ) from .tv2dk import ( TV2DKIE, @@ -2084,16 +2150,17 @@ from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tv5unis import ( - TV5UnisVideoIE, TV5UnisIE, + TV5UnisVideoIE, ) +from .tv24ua import TV24UAVideoIE from .tva import ( TVAIE, QubIE, ) from .tvanouvelles import ( - TVANouvellesIE, TVANouvellesArticleIE, + TVANouvellesIE, ) from .tvc import ( TVCIE, @@ -2106,19 +2173,19 @@ from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvopengr import ( - TVOpenGrWatchIE, TVOpenGrEmbedIE, + TVOpenGrWatchIE, ) from .tvp import ( - TVPEmbedIE, TVPIE, + TVPEmbedIE, TVPStreamIE, TVPVODSeriesIE, TVPVODVideoIE, ) from .tvplay import ( - TVPlayIE, TVPlayHomeIE, + TVPlayIE, ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE @@ -2130,29 +2197,29 @@ TwitCastingUserIE, ) from .twitch import ( - TwitchVodIE, + TwitchClipsIE, TwitchCollectionIE, - TwitchVideosIE, + TwitchStreamIE, TwitchVideosClipsIE, TwitchVideosCollectionsIE, - TwitchStreamIE, - TwitchClipsIE, + TwitchVideosIE, + TwitchVodIE, ) from .twitter import ( - TwitterCardIE, - TwitterIE, TwitterAmplifyIE, TwitterBroadcastIE, - TwitterSpacesIE, + TwitterCardIE, + TwitterIE, TwitterShortenerIE, + TwitterSpacesIE, ) from .txxx import ( - TxxxIE, PornTopIE, + TxxxIE, ) from .udemy import ( + UdemyCourseIE, UdemyIE, - UdemyCourseIE ) from .udn import UDNEmbedIE from .ufctv import ( @@ -2161,16 +2228,13 @@ ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE -from .digiteka import DigitekaIE -from .dlive import ( - DLiveVODIE, - DLiveStreamIE, -) -from .drooble import DroobleIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE -from .unsupported import KnownDRMIE, KnownPiracyIE +from .unsupported import ( + KnownDRMIE, + KnownPiracyIE, +) from .uol import UOLIE from .uplynk import ( UplynkIE, @@ -2180,10 +2244,13 @@ from .urplay import URPlayIE from .usanetwork import USANetworkIE from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE +from .ustream import ( + UstreamChannelIE, + UstreamIE, +) from .ustudio import ( - UstudioIE, UstudioEmbedIE, + UstudioIE, ) from .utreon import UtreonIE from .varzesh3 import Varzesh3IE @@ -2191,7 +2258,7 @@ from .veo import VeoIE from .veoh import ( VeohIE, - VeohUserIE + VeohUserIE, ) from .vesti import VestiIE from .vevo import ( @@ -2199,14 +2266,14 @@ VevoPlaylistIE, ) from .vgtv import ( + VGTVIE, BTArticleIE, BTVestlendingenIE, - VGTVIE, ) from .vh1 import VH1IE from .vice import ( - ViceIE, ViceArticleIE, + ViceIE, ViceShowIE, ) from .viddler import ViddlerIE @@ -2218,42 +2285,46 @@ from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videoken import ( + VideoKenCategoryIE, VideoKenIE, VideoKenPlayerIE, VideoKenPlaylistIE, - VideoKenCategoryIE, VideoKenTopicIE, ) from .videomore import ( VideomoreIE, - VideomoreVideoIE, VideomoreSeasonIE, + VideomoreVideoIE, ) from .videopress import VideoPressIE from .vidio import ( VidioIE, + VidioLiveIE, VidioPremierIE, - VidioLiveIE ) from .vidlii import VidLiiIE from .vidly import VidlyIE from .viewlift import ( - ViewLiftIE, ViewLiftEmbedIE, + ViewLiftIE, ) from .viidea import ViideaIE +from .viki import ( + VikiChannelIE, + VikiIE, +) from .vimeo import ( - VimeoIE, + VHXEmbedIE, VimeoAlbumIE, VimeoChannelIE, VimeoGroupsIE, + VimeoIE, VimeoLikesIE, VimeoOndemandIE, VimeoProIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, - VHXEmbedIE, ) from .vimm import ( VimmIE, @@ -2263,46 +2334,41 @@ VineIE, VineUserIE, ) -from .viki import ( - VikiIE, - VikiChannelIE, -) from .viously import ViouslyIE from .viqeo import ViqeoIE from .viu import ( ViuIE, - ViuPlaylistIE, ViuOTTIE, ViuOTTIndonesiaIE, + ViuPlaylistIE, ) from .vk import ( VKIE, - VKUserVideosIE, - VKWallPostIE, VKPlayIE, VKPlayLiveIE, + VKUserVideosIE, + VKWallPostIE, ) from .vocaroo import VocarooIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicy import ( - VoicyIE, VoicyChannelIE, + VoicyIE, ) from .volejtv import VolejTVIE from .voxmedia import ( - VoxMediaVolumeIE, VoxMediaIE, + VoxMediaVolumeIE, ) from .vrt import ( VRTIE, - VrtNUIE, - KetnetIE, DagelijkseKostIE, + KetnetIE, Radio1BeIE, + VrtNUIE, ) from .vtm import VTMIE -from .medialaan import MedialaanIE from .vuclip import VuClipIE from .vvvvid import ( VVVVIDIE, @@ -2310,20 +2376,20 @@ ) from .walla import WallaIE from .washingtonpost import ( - WashingtonPostIE, WashingtonPostArticleIE, + WashingtonPostIE, ) from .wat import WatIE from .wdr import ( WDRIE, - WDRPageIE, WDRElefantIE, WDRMobileIE, + WDRPageIE, ) from .webcamerapl import WebcameraplIE from .webcaster import ( - WebcasterIE, WebcasterFeedIE, + WebcasterIE, ) from .webofstories import ( WebOfStoriesIE, @@ -2331,42 +2397,42 @@ ) from .weibo import ( WeiboIE, - WeiboVideoIE, WeiboUserIE, + WeiboVideoIE, ) from .weiqitv import WeiqiTVIE from .weverse import ( WeverseIE, - WeverseMediaIE, - WeverseMomentIE, + WeverseLiveIE, WeverseLiveTabIE, + WeverseMediaIE, WeverseMediaTabIE, - WeverseLiveIE, + WeverseMomentIE, ) from .wevidi import WeVidiIE from .weyyak import WeyyakIE +from .whowatch import WhoWatchIE from .whyp import WhypIE from .wikimedia import WikimediaIE from .wimbledon import WimbledonIE from .wimtv import WimTVIE -from .whowatch import WhoWatchIE from .wistia import ( + WistiaChannelIE, WistiaIE, WistiaPlaylistIE, - WistiaChannelIE, ) from .wordpress import ( - WordpressPlaylistEmbedIE, WordpressMiniAudioPlayerEmbedIE, + WordpressPlaylistEmbedIE, ) from .worldstarhiphop import WorldStarHipHopIE from .wppilot import ( - WPPilotIE, WPPilotChannelsIE, + WPPilotIE, ) from .wrestleuniverse import ( - WrestleUniverseVODIE, WrestleUniversePPVIE, + WrestleUniverseVODIE, ) from .wsj import ( WSJIE, @@ -2374,22 +2440,22 @@ ) from .wwe import WWEIE from .wykop import ( - WykopDigIE, WykopDigCommentIE, - WykopPostIE, + WykopDigIE, WykopPostCommentIE, + WykopPostIE, ) from .xanimu import XanimuIE from .xboxclips import XboxClipsIE from .xhamster import ( - XHamsterIE, XHamsterEmbedIE, + XHamsterIE, XHamsterUserIE, ) from .xiaohongshu import XiaoHongShuIE from .ximalaya import ( + XimalayaAlbumIE, XimalayaIE, - XimalayaAlbumIE ) from .xinpianchang import XinpianchangIE from .xminus import XMinusIE @@ -2397,27 +2463,27 @@ from .xstream import XstreamIE from .xvideos import ( XVideosIE, - XVideosQuickiesIE + XVideosQuickiesIE, ) from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, - YahooSearchIE, YahooJapanNewsIE, + YahooSearchIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( - YandexMusicTrackIE, YandexMusicAlbumIE, - YandexMusicPlaylistIE, - YandexMusicArtistTracksIE, YandexMusicArtistAlbumsIE, + YandexMusicArtistTracksIE, + YandexMusicPlaylistIE, + YandexMusicTrackIE, ) from .yandexvideo import ( YandexVideoIE, YandexVideoPreviewIE, - ZenYandexIE, ZenYandexChannelIE, + ZenYandexIE, ) from .yapfiles import YapFilesIE from .yappy import ( @@ -2431,24 +2497,26 @@ YoukuShowIE, ) from .younow import ( - YouNowLiveIE, YouNowChannelIE, + YouNowLiveIE, YouNowMomentIE, ) from .youporn import YouPornIE from .zaiko import ( - ZaikoIE, ZaikoETicketIE, + ZaikoIE, ) from .zapiks import ZapiksIE from .zattoo import ( BBVTVIE, + EWETVIE, + SAKTVIE, + VTXTVIE, BBVTVLiveIE, BBVTVRecordingsIE, EinsUndEinsTVIE, EinsUndEinsTVLiveIE, EinsUndEinsTVRecordingsIE, - EWETVIE, EWETVLiveIE, EWETVRecordingsIE, GlattvisionTVIE, @@ -2466,13 +2534,11 @@ QuantumTVIE, QuantumTVLiveIE, QuantumTVRecordingsIE, + SAKTVLiveIE, + SAKTVRecordingsIE, SaltTVIE, SaltTVLiveIE, SaltTVRecordingsIE, - SAKTVIE, - SAKTVLiveIE, - SAKTVRecordingsIE, - VTXTVIE, VTXTVLiveIE, VTXTVRecordingsIE, WalyTVIE, @@ -2483,7 +2549,10 @@ ZattooMoviesIE, ZattooRecordingsIE, ) -from .zdf import ZDFIE, ZDFChannelIE +from .zdf import ( + ZDFIE, + ZDFChannelIE, +) from .zee5 import ( Zee5IE, Zee5SeriesIE, @@ -2493,16 +2562,16 @@ from .zetland import ZetlandDKArticleIE from .zhihu import ZhihuIE from .zingmp3 import ( - ZingMp3IE, ZingMp3AlbumIE, ZingMp3ChartHomeIE, - ZingMp3WeekChartIE, ZingMp3ChartMusicVideoIE, - ZingMp3UserIE, ZingMp3HubIE, + ZingMp3IE, ZingMp3LiveRadioIE, ZingMp3PodcastEpisodeIE, ZingMp3PodcastIE, + ZingMp3UserIE, + ZingMp3WeekChartIE, ) from .zoom import ZoomIE from .zype import ZypeIE diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index b21742281808..2c0d296fd2d7 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -6,10 +6,10 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - dict_get, ExtractorError, - js_to_json, + dict_get, int_or_none, + js_to_json, parse_iso8601, str_or_none, traverse_obj, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index fee7375eac68..b8c79b912a93 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -12,20 +12,21 @@ import urllib.request import urllib.response import uuid -from ..utils.networking import clean_proxies + from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..utils import ( ExtractorError, + OnDemandPagedList, bytes_to_intlist, decode_base_n, int_or_none, intlist_to_bytes, - OnDemandPagedList, time_seconds, traverse_obj, update_url_query, ) +from ..utils.networking import clean_proxies def add_opener(ydl, handler): # FIXME: Create proper API in .networking diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index c3b4f432ee6b..07933192f3bd 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -3,10 +3,10 @@ float_or_none, format_field, int_or_none, - str_or_none, - traverse_obj, parse_codecs, parse_qs, + str_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 898d37298090..2f3b67dad4cb 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -10,18 +10,18 @@ from ..compat import compat_b64decode from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, ass_subtitles_timecode, bytes_to_intlist, bytes_to_long, - ExtractorError, float_or_none, int_or_none, intlist_to_bytes, long_to_bytes, parse_iso8601, pkcs1pad, - strip_or_none, str_or_none, + strip_or_none, try_get, unified_strdate, urlencode_postdata, diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index d1525a1af2d5..08e9e51823c6 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ISO639Utils, + OnDemandPagedList, float_or_none, int_or_none, - ISO639Utils, join_nonempty, - OnDemandPagedList, parse_duration, str_or_none, str_to_int, diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 0b73a966edcc..6cc63cd7f989 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -5,7 +5,7 @@ int_or_none, mimetype2ext, parse_iso8601, - traverse_obj + traverse_obj, ) diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 87219f2f8d39..49df4bf3aa14 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -12,7 +12,6 @@ ) from ..utils.traversal import traverse_obj - _FIELDS = ''' _id clipImageSource diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index 8d5b472d3239..f927965de9ee 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( - parse_iso8601, + int_or_none, parse_duration, parse_filesize, - int_or_none, + parse_iso8601, ) diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index b785c62c3210..cb2b9891e9b7 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,17 +1,13 @@ import re from .common import InfoExtractor - -from ..compat import ( - compat_urlparse, -) - +from ..compat import compat_urlparse from ..utils import ( + ExtractorError, + clean_html, + int_or_none, urlencode_postdata, urljoin, - int_or_none, - clean_html, - ExtractorError ) diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 5018710e0346..509b21a5316c 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from .youtube import YoutubeIE from .vimeo import VimeoIE +from .youtube import YoutubeIE from ..utils import ( int_or_none, parse_iso8601, diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 0d259c549fa3..6b2bf2db2c56 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, mimetype2ext, parse_iso8601, diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 52f2ad057f74..5e78f372e46c 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -5,7 +5,7 @@ int_or_none, str_or_none, traverse_obj, - unified_timestamp + unified_timestamp, ) diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 306b3651e334..9f5b9b523e36 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import url_or_none, merge_dicts +from ..utils import merge_dicts, url_or_none class AngelIE(InfoExtractor): diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index d00b0f9060ec..433eb4ed8e89 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - str_to_int, - ExtractorError -) +from ..utils import ExtractorError, str_to_int class AppleConnectIE(InfoExtractor): diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 2e0b0a8c932b..21103aee5719 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index a493714d1fb2..9a5524aabed8 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -4,8 +4,8 @@ compat_urllib_parse_urlparse, ) from ..utils import ( - format_field, float_or_none, + format_field, int_or_none, parse_iso8601, remove_start, diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index d60feba3159a..20ee34cca76a 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, float_or_none, jwt_encode_hs256, try_get, - ExtractorError, ) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index 6fc938de9c44..a8dfb3efcc38 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_str, + compat_urllib_parse_urlencode, ) from ..utils import ( format_field, diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 67af29a962e4..c4e07a79a844 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -2,12 +2,12 @@ from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlparse, compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( - format_field, InAdvancePagedList, + format_field, traverse_obj, unified_timestamp, ) diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 51e7220578bc..82dc9ab025e7 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( - try_get, - int_or_none, - url_or_none, float_or_none, + int_or_none, + try_get, unified_timestamp, + url_or_none, ) diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 042b3220b749..da98ac314047 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, str_or_none, diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index e875957cf5b7..aa3d63ee7b34 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .amp import AMPIE +from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index 3d6e0330429d..ef0151de672f 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -1,3 +1,4 @@ +from .common import InfoExtractor from ..utils import ( mimetype2ext, parse_duration, @@ -5,7 +6,6 @@ str_or_none, traverse_obj, ) -from .common import InfoExtractor class BloggerIE(InfoExtractor): diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 92f8ea2cb47c..267586687258 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( extract_attributes, ) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index 51f9eb787348..da06cc3f860f 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - js_to_json, - traverse_obj, - unified_timestamp -) +from ..utils import js_to_json, traverse_obj, unified_timestamp class BoxCastVideoIE(InfoExtractor): diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 1200437e639c..04b1dd80c838 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -6,7 +6,7 @@ classproperty, int_or_none, traverse_obj, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 61b18412d442..4190e1a0992c 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -12,10 +12,11 @@ ) from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + UnsupportedError, clean_html, dict_get, extract_attributes, - ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, @@ -29,7 +30,6 @@ try_get, unescapeHTML, unsmuggle_url, - UnsupportedError, update_url_query, url_or_none, ) diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index cf830210f86c..aca9782c76d4 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -5,14 +5,14 @@ from ..utils import ( ExtractorError, extract_attributes, + find_xpath_attr, get_element_html_by_id, int_or_none, - find_xpath_attr, smuggle_url, - xpath_element, - xpath_text, update_url_query, url_or_none, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py index 9cffa11e81f0..745b71f24399 100644 --- a/yt_dlp/extractor/cinetecamilano.py +++ b/yt_dlp/extractor/cinetecamilano.py @@ -1,4 +1,5 @@ import json + from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index 006a713b2a57..67b56e00d9ab 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -1,11 +1,11 @@ +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, qualities, ) -import re - class ClippitIE(InfoExtractor): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a952828fba1d..a33cef354e08 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,5 +1,6 @@ import base64 import collections +import functools import getpass import hashlib import http.client @@ -21,7 +22,6 @@ import urllib.request import xml.etree.ElementTree -from ..compat import functools # isort: split from ..compat import ( compat_etree_fromstring, compat_expanduser, diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index bcc34ddd8a12..0a98c980f13a 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -1,7 +1,7 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( - dict_get, ExtractorError, + dict_get, float_or_none, int_or_none, ) diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index 1ef90b5a07b8..0cb7d940cdb9 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, @@ -13,7 +14,6 @@ parse_age_limit, parse_duration, url_or_none, - ExtractorError ) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 0075680e8f50..e56584e4e70e 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -1,10 +1,12 @@ import re from .common import InfoExtractor +from .senategov import SenateISVPIE +from .ustream import UstreamIE from ..compat import compat_HTMLParseError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, extract_attributes, find_xpath_attr, get_element_by_attribute, @@ -19,8 +21,6 @@ str_to_int, unescapeHTML, ) -from .senategov import SenateISVPIE -from .ustream import UstreamIE class CSpanIE(InfoExtractor): diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index cec178f03409..1817bd2ff92e 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from ..utils import unified_timestamp from .youtube import YoutubeIE +from ..utils import unified_timestamp class CtsNewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 43401e111568..4c25bea11c37 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - int_or_none, determine_protocol, + int_or_none, try_get, unescapeHTML, ) diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 5e14d6aff029..2e0f6f0d34b9 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,8 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate from ..compat import compat_str +from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate class DamtomoBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 1624d085c1fe..177424937804 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,11 +1,11 @@ -import re import os.path +import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - url_basename, remove_start, + url_basename, ) diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index c11cd790b005..4380c414ee04 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( ExtractorError, parse_resolution, diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 1f3d8e31c53a..b2663a63dd2c 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, int_or_none, parse_age_limit, remove_end, diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index 430de326f40a..d8dde0ca71bf 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, - unified_strdate, determine_ext, + int_or_none, join_nonempty, + unified_strdate, update_url_query, ) diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index ee8893d5af8c..244ffdf1cd18 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,5 +1,5 @@ -import time import hashlib +import time import urllib import uuid diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 1ecc4baf6799..ddf2128b0a43 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, int_or_none, remove_start, diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index e5dab6ac0d5d..a9247edc0f1c 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, NO_DEFAULT, + int_or_none, parse_duration, str_to_int, ) diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index 626e577e7eff..adc7705bc209 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -5,9 +5,9 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + ExtractorError, clean_html, extract_attributes, - ExtractorError, get_elements_by_class, int_or_none, js_to_json, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index e67143370ba4..e6660dcd9924 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -2,15 +2,15 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, join_nonempty, js_to_json, mimetype2ext, + parse_iso8601, try_get, unescapeHTML, - parse_iso8601, ) diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index f7b852076608..feab804af91a 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,10 +1,10 @@ from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, unified_strdate, url_or_none, ) -from ..compat import compat_urlparse class DWIE(InfoExtractor): diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 9ecdf5d3b7c9..19c6933e7fdc 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -4,15 +4,15 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, dict_get, int_or_none, merge_dicts, - parse_qs, parse_age_limit, parse_iso8601, + parse_qs, str_or_none, try_get, url_or_none, diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 29dfc8ae9585..0cf889a1ecdc 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -8,7 +8,7 @@ qualities, traverse_obj, unified_strdate, - xpath_text + xpath_text, ) diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 65a1dc7c503e..66fa42fa1c91 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -1,8 +1,7 @@ from .common import InfoExtractor - from ..utils import ( - parse_duration, js_to_json, + parse_duration, ) diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index d8b068e9c5e8..4a13ab08d54e 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, - parse_duration, ExtractorError, + parse_duration, + xpath_text, ) diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index cddf254978b1..1e80f9a378a8 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,12 +1,6 @@ from .common import InfoExtractor - from ..compat import compat_str -from ..utils import ( - parse_iso8601, - ExtractorError, - try_get, - mimetype2ext -) +from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get class FancodeVodIE(InfoExtractor): diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index bca62add9f73..796bac3c3173 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -3,9 +3,9 @@ from .common import InfoExtractor from ..compat import compat_etree_fromstring from ..utils import ( + int_or_none, xpath_element, xpath_text, - int_or_none, ) diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index 8175b6b0f770..b2dbb92d5e21 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, ) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index f604cbd40de7..ae837f6a0203 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, traverse_obj, diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 0cd18f49479a..69ca87c842f1 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -2,10 +2,10 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + int_or_none, qualities, strip_or_none, - int_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index f9d22fd3381c..c10d290dc34f 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -7,7 +7,7 @@ parse_codecs, parse_duration, str_to_int, - unified_timestamp + unified_timestamp, ) diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index 1d3c0b110746..b284e1e28488 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -10,7 +10,7 @@ int_or_none, str_or_none, traverse_obj, - try_get + try_get, ) diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index bc56b03e36c1..6403be8cf4d3 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -1,4 +1,5 @@ import re + from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2cfed0fd0a1d..2818c718d167 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -4,7 +4,7 @@ import urllib.parse import xml.etree.ElementTree -from .common import InfoExtractor # isort: split +from .common import InfoExtractor from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 7795dc56f752..b9dc7c63c52c 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - bool_or_none, ExtractorError, + bool_or_none, dict_get, float_or_none, int_or_none, diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index c5bc86bb4a4a..7baf8de8d61c 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( ExtractorError, urlencode_postdata, diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 22aac0db90aa..515f3c5671a5 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -3,9 +3,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, int_or_none, parse_qs, smuggle_url, diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index b075a02e0fe1..fba98d79f501 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -3,16 +3,16 @@ from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( - int_or_none, + ExtractorError, determine_ext, + int_or_none, parse_age_limit, - remove_start, remove_end, + remove_start, + traverse_obj, try_get, - urlencode_postdata, - ExtractorError, unified_timestamp, - traverse_obj, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index f010fff36083..276a6c7fe918 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -4,7 +4,7 @@ determine_ext, str_or_none, unified_timestamp, - url_or_none + url_or_none, ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index c6eca0c4dffc..fac088462139 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,10 +1,7 @@ import hashlib from .common import InfoExtractor -from ..utils import ( - ExtractorError, - try_get -) +from ..utils import ExtractorError, try_get class GofileIE(InfoExtractor): diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 112293bef56c..9c1a6cb91f25 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,11 +1,8 @@ +import json + from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - try_get, - url_or_none -) - -import json +from ..utils import try_get, url_or_none class GoToStageIE(InfoExtractor): diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 530bdb7270f8..2551cfffdbfe 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, - xpath_element, int_or_none, parse_duration, urljoin, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index c7da8f97dee0..eb0a77952ea6 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, KNOWN_EXTENSIONS, + determine_ext, str_to_int, ) diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index e026996da6fd..099c2a175c5b 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, merge_dicts, parse_count, diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 57b76e46b444..41d50d000436 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -4,8 +4,8 @@ from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, parse_age_limit, try_get, diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index c4965f9bce85..5379b54100ea 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -2,8 +2,8 @@ import random import re -from ..compat import compat_urlparse, compat_b64decode - +from .common import InfoExtractor +from ..compat import compat_b64decode, compat_urlparse from ..utils import ( ExtractorError, int_or_none, @@ -13,8 +13,6 @@ update_url_query, ) -from .common import InfoExtractor - class HuyaLiveIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)' diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index 9d55ddc02128..c28d09f34a8a 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate from ..compat import compat_str +from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate class IchinanaLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 192bcfe35d21..2bb48508caba 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,3 +1,4 @@ +from .bokecc import BokeCCBaseIE from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, @@ -6,10 +7,9 @@ from ..utils import ( ExtractorError, determine_ext, - update_url_query, traverse_obj, + update_url_query, ) -from .bokecc import BokeCCBaseIE class InfoQIE(BokeCCBaseIE): diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index f7aa579b38d8..d5a3d8095f3f 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -3,12 +3,12 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, js_to_json, - urlencode_postdata, - ExtractorError, parse_qs, - traverse_obj + traverse_obj, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 3368ab1d93af..85ed549deb23 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -4,20 +4,16 @@ import time from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_unquote -) from .openload import PhantomJSwrapper +from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode from ..utils import ( + ExtractorError, clean_html, decode_packed_codes, - ExtractorError, float_or_none, format_field, - get_element_by_id, get_element_by_attribute, + get_element_by_id, int_or_none, js_to_json, ohdave_rsa_encrypt, diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 713fd4ec5bd3..5d6fbaa01746 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -1,12 +1,11 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, str_or_none, traverse_obj, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 9ac7be3074b7..55c4165215c1 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,23 +1,22 @@ import json -from .common import InfoExtractor from .brightcove import BrightcoveNewIE - +from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + JSON_LD_RE, + ExtractorError, base_url, clean_html, determine_ext, extract_attributes, - ExtractorError, get_element_by_class, - JSON_LD_RE, merge_dicts, parse_duration, smuggle_url, try_get, - url_or_none, url_basename, + url_or_none, urljoin, ) diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index e23fdfd6ade6..a11f3f11d862 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,9 +1,9 @@ import functools -import urllib.parse -import urllib.error import hashlib import json import time +import urllib.error +import urllib.parse from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index a2bbba3979b6..8557a81ad454 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -1,8 +1,8 @@ import hashlib import random -from ..compat import compat_str from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, int_or_none, diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index 6c650568acda..19d2b923b592 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -1,5 +1,6 @@ import re +from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, @@ -9,9 +10,8 @@ smuggle_url, traverse_obj, try_call, - unsmuggle_url + unsmuggle_url, ) -from .common import InfoExtractor def _parse_japanese_date(text): diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 245fe73d4abf..8069fea4c936 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unified_strdate -) +from ..utils import ExtractorError, unified_strdate class JoveIE(InfoExtractor): diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py index 3e2e6271255e..00ac7ccca31c 100644 --- a/yt_dlp/extractor/jstream.py +++ b/yt_dlp/extractor/jstream.py @@ -1,6 +1,6 @@ import base64 -import re import json +import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 43055e89debe..563aa2d729ac 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -3,8 +3,8 @@ from ..utils import ( ExtractorError, int_or_none, - strip_or_none, str_or_none, + strip_or_none, traverse_obj, unified_timestamp, ) diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index 95e2deea5bdd..4752d5a55817 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -4,18 +4,18 @@ from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_parse_qs, + compat_urlparse, ) from ..utils import ( - clean_html, ExtractorError, + clean_html, format_field, int_or_none, - unsmuggle_url, + remove_start, smuggle_url, traverse_obj, - remove_start + unsmuggle_url, ) diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py index 8f247b305a8b..3d74c745c4d5 100644 --- a/yt_dlp/extractor/kankanews.py +++ b/yt_dlp/extractor/kankanews.py @@ -1,7 +1,7 @@ -import time +import hashlib import random import string -import hashlib +import time import urllib.parse from .common import InfoExtractor diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 3c93dedac2f9..b77667160c4a 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - get_element_by_id, - clean_html, ExtractorError, InAdvancePagedList, + clean_html, + get_element_by_id, remove_start, ) diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index 9846319e0c3d..62874195f627 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .arkena import ArkenaIE +from .common import InfoExtractor class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 10fb5d47993a..1a3ada1e5d13 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -4,8 +4,8 @@ from ..utils import ( determine_ext, determine_protocol, - parse_duration, int_or_none, + parse_duration, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 629d208fcce3..90f0268d7b34 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, float_or_none, int_or_none, str_or_none, diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index 5d61a607f7cf..a113b3d0db7f 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -11,9 +11,9 @@ compat_urllib_parse_urlencode, ) from ..utils import ( + ExtractorError, determine_ext, encode_data_uri, - ExtractorError, int_or_none, orderedSet, parse_iso8601, diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index b76ca090818e..297993939609 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( determine_ext, float_or_none, diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 919cfcb374b2..ea150a58b0dd 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -6,8 +6,8 @@ compat_urlparse, ) from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, remove_end, diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 4e50f106f974..1ff091ddb7d6 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -3,13 +3,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, smuggle_url, try_get, unsmuggle_url, - ExtractorError, ) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index e12f467ef51c..2a7c6f0e0366 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -7,8 +7,8 @@ extract_attributes, float_or_none, int_or_none, - srt_subtitles_timecode, mimetype2ext, + srt_subtitles_timecode, traverse_obj, try_get, url_or_none, diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index fd9bba8bcb98..fa12a6a8dfed 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -1,14 +1,13 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, js_to_json, parse_duration, traverse_obj, try_get, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index 2792e6e7078a..44c321c26251 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -1,10 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - clean_html, - int_or_none, - traverse_obj -) - +from ..utils import clean_html, int_or_none, traverse_obj _API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}' diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 675ad8ccc111..d040fb48f1f6 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -4,8 +4,8 @@ from ..compat import compat_str from ..utils import ( ExtractorError, - format_field, float_or_none, + format_field, int_or_none, str_or_none, traverse_obj, diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index fcc4827b5c91..c015977626f6 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,14 +1,11 @@ +from .common import InfoExtractor +from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ( ExtractorError, traverse_obj, unified_strdate, url_or_none, ) -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_str -) class MediaKlikkIE(InfoExtractor): diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index e04a1ce90171..b7df5c75ab21 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -5,11 +5,11 @@ from ..utils import ( ExtractorError, GeoRestrictedError, - int_or_none, OnDemandPagedList, + int_or_none, try_get, - urljoin, update_url_query, + urljoin, ) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index 7ea78ab69184..d3fec4ec2b09 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import ( @@ -10,16 +10,15 @@ ExtractorError, float_or_none, mimetype2ext, + smuggle_url, str_or_none, try_call, try_get, - smuggle_url, unsmuggle_url, url_or_none, urljoin, ) - _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 5f5f160876c4..f6a0b416d6e1 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..utils import ( merge_dicts, - parse_iso8601, parse_duration, + parse_iso8601, parse_resolution, try_get, url_basename, diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index f64d575dcc81..caf60c805910 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, + OnDemandPagedList, determine_ext, dict_get, - ExtractorError, float_or_none, - OnDemandPagedList, traverse_obj, ) diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 38cc0c2741e8..979584ed6a2e 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -1,11 +1,11 @@ -import re import json +import re from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( - clean_html, ExtractorError, + clean_html, get_element_by_id, ) diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index a69a12e18cf5..411d41cb0a26 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -8,10 +8,10 @@ get_element_html_by_class, get_element_text_and_html_by_tag, int_or_none, - unified_strdate, strip_or_none, traverse_obj, try_call, + unified_strdate, ) diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 35c57bc70334..ed5be4fa65de 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -1,5 +1,5 @@ -from .dailymotion import DailymotionIE from .common import InfoExtractor +from .dailymotion import DailymotionIE class MoviepilotIE(InfoExtractor): diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index cdd8ba4dced1..6e0ea2652a1f 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - unescapeHTML, parse_duration, + unescapeHTML, ) diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index 77d1806a3a43..79728e106317 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, unescapeHTML, ) diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index edc41443ab4f..8a8a5fec7c16 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - unified_timestamp, extract_attributes, + unified_timestamp, ) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 885557e91c2c..26400e383375 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -4,8 +4,8 @@ import itertools import json import re -import urllib.parse import time +import urllib.parse from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 81d11e3a5061..ec4d6368e442 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -7,9 +7,9 @@ compat_urllib_parse_unquote, ) from ..utils import ( + OnDemandPagedList, int_or_none, merge_dicts, - OnDemandPagedList, parse_duration, parse_iso8601, parse_qs, diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 267fa8353231..e88f98abf53b 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -3,9 +3,9 @@ import re import xml.etree.ElementTree +from .adobepass import AdobePassIE from .common import InfoExtractor from .theplatform import ThePlatformIE, default_ns -from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..utils import ( diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 41ea3629a97b..243221d46be4 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, merge_dicts, parse_iso8601, diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index febad8fdf30a..be732a32ffa9 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -1,11 +1,5 @@ from .common import InfoExtractor - - -from ..utils import ( - try_get, - unified_strdate, - unified_timestamp -) +from ..utils import try_get, unified_strdate, unified_timestamp class NFHSNetworkIE(InfoExtractor): diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index 2521c40e0836..64cddb408728 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -3,8 +3,8 @@ from ..utils import ( determine_ext, int_or_none, - parse_iso8601, parse_duration, + parse_iso8601, ) diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py index 900d9ba60fb3..0b4f47b48120 100644 --- a/yt_dlp/extractor/ninenews.py +++ b/yt_dlp/extractor/ninenews.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .brightcove import BrightcoveNewIE +from .common import InfoExtractor from ..utils import ExtractorError from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index c655b75f4695..b7170b0e7081 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -2,8 +2,8 @@ from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, smuggle_url, str_or_none, try_get, diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 35d1311dcdcd..249e7cd33784 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,13 +1,14 @@ +import random +import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + determine_ext, parse_count, - unified_timestamp, remove_end, - determine_ext, + unified_timestamp, ) -import re -import random class NitterIE(InfoExtractor): diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index cddc72f716e5..513529beaa6f 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,11 +1,11 @@ from .common import InfoExtractor from ..utils import ( - js_to_json, - mimetype2ext, determine_ext, - update_url_query, get_element_by_attribute, int_or_none, + js_to_json, + mimetype2ext, + update_url_query, ) diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index c7b80380362c..19cb972c0a41 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,11 +1,11 @@ from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote from ..utils import ( - int_or_none, find_xpath_attr, - xpath_text, + int_or_none, update_url_query, + xpath_text, ) -from ..compat import compat_urllib_parse_unquote class NozIE(InfoExtractor): diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index ec54041f123f..5670445aa88a 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -1,9 +1,5 @@ from .common import InfoExtractor - -from ..utils import ( - float_or_none, - xpath_text -) +from ..utils import float_or_none, xpath_text class NuevoBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 6ac351cb03d9..0ef0ec70b9bd 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, int_or_none, + parse_duration, strip_or_none, traverse_obj, url_or_none, diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index 062f9a875bdd..0a12aea71482 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -3,10 +3,7 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - ExtractorError, - traverse_obj -) +from ..utils import ExtractorError, traverse_obj class NZHeraldIE(InfoExtractor): diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py index b852160b9f7b..8321b0741810 100644 --- a/yt_dlp/extractor/odkmedia.py +++ b/yt_dlp/extractor/odkmedia.py @@ -7,7 +7,7 @@ GeoRestrictedError, float_or_none, traverse_obj, - try_call + try_call, ) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 61d1f404863f..5507d2fda373 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get -) +from ..utils import int_or_none, try_get class OlympicsReplayIE(InfoExtractor): diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py index a46211e77728..351b397de72c 100644 --- a/yt_dlp/extractor/onenewsnz.py +++ b/yt_dlp/extractor/onenewsnz.py @@ -1,10 +1,6 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor - -from ..utils import ( - ExtractorError, - traverse_obj -) +from ..utils import ExtractorError, traverse_obj class OneNewsNZIE(InfoExtractor): diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index 0d59e8cb448a..da10f3779b1b 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -2,13 +2,13 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, + NO_DEFAULT, ExtractorError, + determine_ext, float_or_none, get_element_by_class, int_or_none, js_to_json, - NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index 1fafd9afb40b..12bf557046bd 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, traverse_obj, diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 82a81c6c261e..c9a96aeb4dec 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, get_first, @@ -8,7 +9,6 @@ unified_strdate, unified_timestamp, ) -from ..compat import compat_str class OpenRecBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index d49909d528ac..0e7a8484ea1c 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,4 +1,5 @@ import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 56203306fb73..3e969c84679f 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -3,13 +3,12 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, + clean_html, # remove_end, str_or_none, strip_or_none, unified_timestamp, - # urljoin, ) diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 63c5fd68f138..6b25962361ba 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -5,17 +5,13 @@ import random from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse -) - +from ..compat import compat_urllib_parse_urlparse, compat_urlparse from ..utils import ( - bug_reports_message, ExtractorError, + OnDemandPagedList, + bug_reports_message, get_first, int_or_none, - OnDemandPagedList, parse_qs, srt_subtitles_timecode, traverse_obj, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 7e472a63e005..3f19803c012d 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,7 +1,7 @@ import itertools -from .common import InfoExtractor from .cbs import CBSBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 2bb2ea9f1962..f6f5a5c3e69a 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -3,10 +3,11 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + US_RATINGS, ExtractorError, determine_ext, - int_or_none, float_or_none, + int_or_none, js_to_json, orderedSet, strip_jsonp, @@ -14,7 +15,6 @@ traverse_obj, unified_strdate, url_or_none, - US_RATINGS, ) diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index e27e5a7bac5e..086eaaf000e1 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..utils import ( qualities, - unified_timestamp, traverse_obj, + unified_timestamp, ) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 730b2393e0a9..b7919c0734bb 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + OnDemandPagedList, format_field, int_or_none, parse_resolution, @@ -12,7 +13,6 @@ unified_timestamp, url_or_none, urljoin, - OnDemandPagedList, ) diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 97a9bf5745b9..8870d7b9924c 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - dict_get, ExtractorError, + dict_get, int_or_none, join_nonempty, parse_iso8601, diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index d67f6005c179..c72a3876c6a2 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -1,11 +1,11 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_qs, - xpath_text, qualities, + xpath_text, ) diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 166b98c4a2b5..d978c080b354 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -4,8 +4,8 @@ compat_str, ) from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, str_or_none, try_get, diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index c418f88cb1d0..a01b422901fa 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_urllib_parse_urlencode, + compat_urlparse, ) from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 809b65608f2b..60c9efffea5e 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -10,8 +10,8 @@ compat_urlparse, ) from ..utils import ( - dict_get, ExtractorError, + dict_get, float_or_none, int_or_none, parse_duration, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1cebb365e4c9..ecf2132b4ba3 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, try_get, url_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index 51a9cf38f73f..d711d3e67d7f 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601 -) +from ..utils import int_or_none, parse_duration, parse_iso8601 class PornFlipIE(InfoExtractor): diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 2e51b4f6b25d..b8e8701a8fbf 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 5bb1832702e0..338794ed5d4f 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -1,14 +1,15 @@ import itertools + from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( - urljoin, - traverse_obj, + clean_html, int_or_none, mimetype2ext, - clean_html, - url_or_none, - unified_timestamp, str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index 4b8e5e90de14..fc4c29e95fea 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, parse_resolution, str_or_none, try_get, diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py index aa690d492005..cb00de2d527c 100644 --- a/yt_dlp/extractor/qingting.py +++ b/yt_dlp/extractor/qingting.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import traverse_obj diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 92858259a5f5..90141e63b480 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..utils import ( - clean_html, ExtractorError, + clean_html, strip_jsonp, unescapeHTML, ) diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 1a5a6355a693..4a09dcdfc086 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py index 38f8cf7865d4..0c219778fa3d 100644 --- a/yt_dlp/extractor/radiocomercial.py +++ b/yt_dlp/extractor/radiocomercial.py @@ -14,7 +14,7 @@ try_call, unified_strdate, update_url, - urljoin + urljoin, ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 67520172e8b0..632c8c281bc5 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - traverse_obj, strip_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index 3c00183be62d..325e278fc7b7 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -1,13 +1,13 @@ import json +from .common import InfoExtractor from ..utils import ( ExtractorError, format_field, traverse_obj, try_get, - unified_timestamp + unified_timestamp, ) -from .common import InfoExtractor class RadLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index c1fc65c81fd9..c2e7a6fb8f8e 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -3,11 +3,11 @@ from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( + ExtractorError, + GeoRestrictedError, clean_html, determine_ext, - ExtractorError, filter_dict, - GeoRestrictedError, int_or_none, join_nonempty, parse_duration, diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py index 54f194cbda78..5f2d0c103416 100644 --- a/yt_dlp/extractor/rbgtum.py +++ b/yt_dlp/extractor/rbgtum.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError +from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj class RbgTumIE(InfoExtractor): diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index 6a7c7f399160..9c382e257d23 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -5,11 +5,11 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - dict_get, ExtractorError, + dict_get, strip_or_none, traverse_obj, - try_get + try_get, ) diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index 1a1c6634e325..cc76b898add4 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,10 +1,10 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + js_to_json, parse_duration, parse_iso8601, - js_to_json, ) -from ..compat import compat_str class RDSIE(InfoExtractor): diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index d1de2490fc77..fac51b9efeed 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - float_or_none, ExtractorError, + float_or_none, ) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 44c0353da636..bc3e5f7eeed6 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -10,8 +10,8 @@ try_get, unescapeHTML, update_url_query, - urlencode_postdata, url_or_none, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index f9453202b741..d0546bbfaff5 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -5,10 +5,10 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + OnDemandPagedList, int_or_none, qualities, try_get, - OnDemandPagedList, ) diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 965abbee8a78..14ed0edab287 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, merge_dicts, str_to_int, diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 0a8f13b9f63f..9c9bac6af9e2 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - js_to_json, int_or_none, + js_to_json, unescapeHTML, ) diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index 8d29b302bb1c..bc59ed07e4ce 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE +from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 7ba80d4ba767..729804d23ec9 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -3,13 +3,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, float_or_none, parse_iso8601, str_or_none, try_get, unescapeHTML, url_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index 5928a207aef7..ec78d0a66941 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -1,9 +1,10 @@ -from .common import InfoExtractor -from ..utils import js_to_json -import re +import base64 import json +import re import urllib.parse -import base64 + +from .common import InfoExtractor +from ..utils import js_to_json class RTPIE(InfoExtractor): diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py index 741c472621a1..e7dcd5fd611d 100644 --- a/yt_dlp/extractor/rtvcplay.py +++ b/yt_dlp/extractor/rtvcplay.py @@ -1,16 +1,17 @@ import re -from .common import InfoExtractor, ExtractorError +from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - int_or_none, float_or_none, + int_or_none, js_to_json, mimetype2ext, traverse_obj, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index a84a78da8dce..defb8d741fa8 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( parse_duration, traverse_obj, diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 287824d08a79..eb12f32faf9b 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -5,8 +5,8 @@ compat_str, ) from ..utils import ( - determine_ext, bool_or_none, + determine_ext, int_or_none, parse_qs, try_get, diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index d7f9a73377de..726d49111afb 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -1,11 +1,7 @@ import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - str_to_int -) +from ..utils import ExtractorError, int_or_none, str_to_int class RUTVIE(InfoExtractor): diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 33f6652df565..dc61387be70d 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, find_xpath_attr, int_or_none, traverse_obj, diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 8d322d7105b0..17dff0afa28a 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -2,7 +2,6 @@ import re from .common import InfoExtractor - from ..compat import ( compat_parse_qs, compat_urlparse, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 3912f778658e..85d51cd59a84 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -1,8 +1,8 @@ -import json import hashlib +import json -from .aws import AWSIE from .anvato import AnvatoIE +from .aws import AWSIE from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index 9c2ca8c5185c..fc91d60e179e 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - decode_packed_codes, ExtractorError, + decode_packed_codes, urlencode_postdata, ) diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 1ecea71fcc25..99fcf51f1624 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -2,12 +2,12 @@ from .common import InfoExtractor from ..utils import ( + determine_protocol, float_or_none, - parse_iso8601, - update_url_query, int_or_none, - determine_protocol, + parse_iso8601, unescapeHTML, + update_url_query, ) diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index 79e88858356f..b31d566dfe44 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -4,11 +4,11 @@ compat_urllib_parse_urlparse, ) from ..utils import ( - urljoin, int_or_none, parse_codecs, parse_qs, try_get, + urljoin, ) diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index d509e8879c5d..89aee2728000 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -5,9 +5,9 @@ from .aws import AWSIE from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, InAdvancePagedList, + clean_html, int_or_none, parse_iso8601, str_or_none, diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index ec9938b8cb8b..cca86ed6c0e4 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -4,8 +4,8 @@ compat_b64decode, ) from ..utils import ( - bytes_to_intlist, ExtractorError, + bytes_to_intlist, intlist_to_bytes, unified_strdate, ) diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index ef93b92768d8..44619a16c664 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -6,8 +6,8 @@ determine_ext, int_or_none, parse_qs, - try_get, qualities, + try_get, ) diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index 867782778bee..234703cf705b 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_iso8601, parse_duration, + parse_iso8601, ) diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index c0ff4f9aa86e..a41ad303a578 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -8,13 +8,13 @@ ) from ..utils import ( ExtractorError, - int_or_none, float_or_none, - url_or_none, - unified_timestamp, + int_or_none, + traverse_obj, try_get, + unified_timestamp, + url_or_none, urljoin, - traverse_obj, ) diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 493eea2a6922..773ddd34458a 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - try_get, - unified_timestamp -) +from ..utils import try_get, unified_timestamp class SovietsClosetBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index 43da34a325b0..c73f7971d002 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, merge_dicts, parse_duration, parse_resolution, diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index a98584a27d5f..bdb8ef49682c 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -4,11 +4,11 @@ from ..utils import ( ExtractorError, int_or_none, - xpath_attr, - xpath_text, - xpath_element, unescapeHTML, unified_timestamp, + xpath_attr, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index bb6e8f1ea561..312a4fde080e 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -3,10 +3,10 @@ compat_str, ) from ..utils import ( - clean_html, ExtractorError, - traverse_obj, + clean_html, int_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 2fd200f87af8..46a15e6a18f8 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, clean_podcast_url, - ExtractorError, int_or_none, str_or_none, try_get, diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 566f77782734..20a70a7bcdeb 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, format_field, int_or_none, - OnDemandPagedList, smuggle_url, ) diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 462861e0e089..c303ac53ac57 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -3,8 +3,8 @@ ExtractorError, float_or_none, int_or_none, - try_get, parse_codecs, + try_get, ) diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index b9523c8654c4..a847925e47a4 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -3,7 +3,7 @@ ExtractorError, UserNotLive, lowercase_escape, - traverse_obj + traverse_obj, ) diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 708873a9566f..501156e5138e 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, + determine_ext, int_or_none, + parse_duration, qualities, - determine_ext, ) diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index bd2d738423f0..29e5e573fe41 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -1,7 +1,7 @@ from .adobepass import AdobePassIE from ..utils import ( - update_url_query, smuggle_url, + update_url_query, ) diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 808c6c73d34e..4e178593f4e2 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -2,8 +2,8 @@ from .turner import TurnerBaseIE from ..compat import ( - compat_urllib_parse_urlparse, compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 5eac9aa3fd09..778fa1263d45 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from .wistia import WistiaIE from ..utils import ( - clean_html, ExtractorError, - int_or_none, + clean_html, get_element_by_class, + int_or_none, strip_or_none, urlencode_postdata, urljoin, diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index 90a976297a19..74024099366f 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, qualities, ) diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index d32f81262a51..3fb899cac5a5 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -13,8 +13,8 @@ parse_qs, traverse_obj, unified_timestamp, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index dd802db5b74f..ba25cdcf65c8 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, float_or_none, get_element_by_class, get_element_by_id, diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py index c28a1549876a..0969bbb03652 100644 --- a/yt_dlp/extractor/ted.py +++ b/yt_dlp/extractor/ted.py @@ -2,14 +2,13 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, + parse_duration, str_to_int, try_get, - url_or_none, unified_strdate, - parse_duration, + url_or_none, ) diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index 212af3785034..1705c2d5564f 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( + determine_ext, js_to_json, qualities, - determine_ext, ) diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 5fdcddd8b328..380c84d98bf7 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,4 +1,5 @@ from __future__ import annotations + import functools import json import textwrap diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py index 9318d6f9ad8b..71e54eb0cf1b 100644 --- a/yt_dlp/extractor/tempo.py +++ b/yt_dlp/extractor/tempo.py @@ -5,7 +5,7 @@ int_or_none, parse_iso8601, traverse_obj, - try_call + try_call, ) diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 6618ea4e6e0c..ae2cb483f7fa 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -8,8 +8,8 @@ from ..aes import aes_cbc_encrypt_bytes from ..utils import ( ExtractorError, - float_or_none, determine_ext, + float_or_none, int_or_none, js_to_json, traverse_obj, diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py index a231eccf4bc7..fb6407715c56 100644 --- a/yt_dlp/extractor/theguardian.py +++ b/yt_dlp/extractor/theguardian.py @@ -10,7 +10,7 @@ parse_qs, traverse_obj, unified_strdate, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index a991a4dfd022..99f0d42ef57e 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_iso8601, - int_or_none, ExtractorError, + int_or_none, + parse_iso8601, ) diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index 9160f5ec6b8e..eeb33a6606db 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,29 +1,27 @@ -import re -import time -import hmac import binascii import hashlib +import hmac +import re +import time - -from .once import OnceIE from .adobepass import AdobePassIE -from ..networking import Request +from .once import OnceIE +from ..networking import HEADRequest, Request from ..utils import ( - determine_ext, ExtractorError, + determine_ext, + find_xpath_attr, float_or_none, int_or_none, - parse_qs, - unsmuggle_url, - update_url_query, - xpath_with_ns, mimetype2ext, - find_xpath_attr, + parse_qs, traverse_obj, + unsmuggle_url, update_url, + update_url_query, urlhandle_detect_ext, + xpath_with_ns, ) -from ..networking import HEADRequest default_ns = 'http://www.w3.org/2005/SMIL21/Language' _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index 7841f8da69e2..f7a13d2c3732 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, int_or_none, join_nonempty, diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index aa7ee6c489e7..ccb2ef8166df 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -1,6 +1,7 @@ -from .common import InfoExtractor import re +from .common import InfoExtractor + class ToypicsIE(InfoExtractor): _WORKING = False diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py index 56e51fea8f30..3bdeedd43e00 100644 --- a/yt_dlp/extractor/triller.py +++ b/yt_dlp/extractor/triller.py @@ -14,8 +14,8 @@ traverse_obj, unified_timestamp, url_basename, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index 86f0990e8328..efedac180c4f 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -1,13 +1,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_age_limit, traverse_obj, unified_timestamp, - url_or_none + url_or_none, ) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index a26bdcaae76e..f2d0c59011dd 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -3,7 +3,7 @@ ExtractorError, int_or_none, traverse_obj, - urlencode_postdata + urlencode_postdata, ) diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 630d84bdc39f..b27db87bf7f0 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -3,17 +3,17 @@ from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( - fix_xml_ampersands, - xpath_text, - int_or_none, + ExtractorError, determine_ext, + fix_xml_ampersands, float_or_none, + int_or_none, parse_duration, - xpath_attr, - update_url_query, - ExtractorError, strip_or_none, + update_url_query, url_or_none, + xpath_attr, + xpath_text, ) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 7756aa3f587f..9b19e79954d2 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, - int_or_none, + determine_ext, float_or_none, + int_or_none, js_to_json, parse_iso8601, remove_end, diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index 9c0a111c0514..cd35ff5fbbca 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,8 +1,8 @@ # encoding: utf-8 from .common import InfoExtractor from ..utils import ( - traverse_obj, UnsupportedError, + traverse_obj, ) diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index b9f5e110eb54..dbebda4f4ecd 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -1,7 +1,7 @@ import re -from .common import InfoExtractor from .brightcove import BrightcoveNewIE +from .common import InfoExtractor class TVANouvellesIE(InfoExtractor): diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index 5276813155b7..ac480580a689 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, NO_DEFAULT, + int_or_none, unescapeHTML, ) diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index a8d00e243ad4..f1ebf027a08a 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -4,10 +4,10 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, dict_get, - ExtractorError, int_or_none, js_to_json, str_or_none, diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 48a6efe1ccb7..29185d34bc6e 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -4,8 +4,8 @@ from ..compat import compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, qualities, diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 228c2366ed66..d43bdc2ff1a8 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -2,10 +2,10 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, extract_attributes, try_get, urlencode_postdata, - ExtractorError, ) diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index e8e1fc666fdf..9249550c970d 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, determine_ext, + int_or_none, mimetype2ext, ) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index fc80dade8f12..1a11162a0b13 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,10 +1,10 @@ +import functools import json import random import re from .common import InfoExtractor from .periscope import PeriscopeBaseIE, PeriscopeIE -from ..compat import functools # isort: split from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 10668ac4b8ca..d5849d29bf89 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,12 +1,12 @@ import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, int_or_none, js_to_json, ) -from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py index f914613c0931..f141804c801e 100644 --- a/yt_dlp/extractor/ukcolumn.py +++ b/yt_dlp/extractor/ukcolumn.py @@ -1,11 +1,11 @@ +from .common import InfoExtractor +from .vimeo import VimeoIE +from .youtube import YoutubeIE from ..utils import ( + ExtractorError, unescapeHTML, urljoin, - ExtractorError, ) -from .common import InfoExtractor -from .vimeo import VimeoIE -from .youtube import YoutubeIE class UkColumnIE(InfoExtractor): diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 7f97fc95f50b..928e6e1c2d66 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( - dict_get, ExtractorError, - int_or_none, ISO639Utils, + dict_get, + int_or_none, parse_age_limit, try_get, unified_timestamp, diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index 3243f3e3bdff..42a28c509e7d 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, get_element_by_attribute, @@ -6,7 +7,6 @@ try_get, update_url_query, ) -from ..compat import compat_str class USATodayIE(InfoExtractor): diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 5df2416537cf..046e3d768c53 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -7,10 +7,10 @@ compat_urlparse, ) from ..utils import ( - encode_data_uri, ExtractorError, - int_or_none, + encode_data_uri, float_or_none, + int_or_none, join_nonempty, mimetype2ext, str_or_none, diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index c3aeeb9615c6..f6ce5b3577d7 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..utils import ( int_or_none, - unified_strdate, unescapeHTML, + unified_strdate, ) diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index ef44d421ec6f..205f8ea63e90 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, mimetype2ext, diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index 3f2dddbe90c2..a2e90226afa2 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -1,8 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError from .rutv import RUTVIE +from ..utils import ExtractorError class VestiIE(InfoExtractor): diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index aa40227a7657..7715d6839265 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index d31908fb1264..b072d9d739c5 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -10,10 +10,10 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, - int_or_none, OnDemandPagedList, + clean_html, + int_or_none, parse_age_limit, str_or_none, try_get, diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 770aa284da95..6322bb04b620 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - clean_html, ExtractorError, + clean_html, format_field, get_element_by_class, int_or_none, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index 44353b7fc45a..e1219a8a0d6a 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( - format_field, float_or_none, + format_field, get_element_by_id, int_or_none, str_to_int, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 91b976403a75..ac96ade18695 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,21 +1,21 @@ import base64 import functools -import re import itertools +import re from .common import InfoExtractor from ..compat import compat_str, compat_urlparse from ..networking import HEADRequest, Request from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + OnDemandPagedList, clean_html, determine_ext, - ExtractorError, get_element_by_class, - js_to_json, int_or_none, + js_to_json, merge_dicts, - OnDemandPagedList, parse_filesize, parse_iso8601, parse_qs, @@ -26,8 +26,8 @@ unified_timestamp, unsmuggle_url, urlencode_postdata, - urljoin, urlhandle_detect_ext, + urljoin, ) diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 6f9af9f643b5..480f49b7b17b 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,8 +1,8 @@ -import re import json -import uuid import random +import re import urllib.parse +import uuid from .common import InfoExtractor from ..compat import compat_str @@ -10,10 +10,10 @@ ExtractorError, int_or_none, remove_end, + smuggle_url, strip_or_none, traverse_obj, try_get, - smuggle_url, unified_timestamp, unsmuggle_url, url_or_none, diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 28d5026850b7..132d65bcaee5 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -20,6 +20,7 @@ parse_resolution, str_or_none, str_to_int, + traverse_obj, try_call, unescapeHTML, unified_timestamp, @@ -27,7 +28,6 @@ url_or_none, urlencode_postdata, urljoin, - traverse_obj, ) diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index a1a9c1708c4a..3ac0f83874a0 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, int_or_none, + xpath_text, ) diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 74501b1d2913..1cfed2da5e71 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import traverse_obj diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index f80f140edc0a..0b7ddd239b12 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -6,16 +6,16 @@ compat_urlparse, ) from ..utils import ( + ExtractorError, determine_ext, dict_get, - ExtractorError, js_to_json, strip_jsonp, try_get, unified_strdate, update_url_query, - urlhandle_detect_ext, url_or_none, + urlhandle_detect_ext, ) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index 2fca745aa5cb..b6a6593850d2 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,6 +1,6 @@ +import itertools import json import random -import itertools import urllib.parse from .common import InfoExtractor diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index f2808cd9fc83..492891d7828f 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,12 +1,12 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, qualities, try_call, try_get, - ExtractorError, ) -from ..compat import compat_str class WhoWatchIE(InfoExtractor): diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index f9bf092df5de..d7d77c0db0c0 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, parse_duration, urlencode_postdata, - ExtractorError, ) diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 5e590e2f4f5e..0ef4e8e53751 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -1,13 +1,13 @@ +import json +import random +import re + from .common import InfoExtractor from ..utils import ( - try_get, ExtractorError, + try_get, ) -import json -import random -import re - class WPPilotBaseIE(InfoExtractor): _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s' diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 86e2646793b1..35fe3036272c 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 01ac5ddb65ed..0b3a620ec20c 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, determine_ext, dict_get, extract_attributes, - ExtractorError, float_or_none, int_or_none, parse_duration, diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 1452aaec3714..74d4f041900e 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + NO_DEFAULT, determine_ext, int_or_none, - NO_DEFAULT, str_to_int, ) diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 8dd1cd9efbee..322e86570f91 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( + find_xpath_attr, int_or_none, parse_iso8601, - xpath_with_ns, xpath_text, - find_xpath_attr, + xpath_with_ns, ) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index a489033abc81..6b16ac2915c0 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -3,9 +3,9 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, int_or_none, parse_duration, ) diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index e3e3a9fe6374..aa6c84d09eaf 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, int_or_none, + parse_duration, ) diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index 794dc3eaea2c..acfe69bf45ac 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -5,8 +5,8 @@ from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, try_get, ) diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 88f526bbc9b3..2a12aa509534 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( + int_or_none, parse_duration, parse_iso8601, - xpath_with_ns, xpath_text, - int_or_none, + xpath_with_ns, ) diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index c24b33874c4c..18b22a5c7d4e 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import format_field, float_or_none, int_or_none +from ..utils import float_or_none, format_field, int_or_none class ZhihuIE(InfoExtractor): diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index ff5eac89a345..909a7a3ae636 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -10,8 +10,8 @@ int_or_none, join_nonempty, try_call, + url_or_none, urljoin, - url_or_none ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index 2f3b4c47f5f6..8d3156d644a0 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - dict_get, ExtractorError, + dict_get, int_or_none, js_to_json, parse_iso8601, From a4da9db87b6486b270c15dfa07ab5bfedc83f6bd Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Sun, 26 May 2024 23:09:53 +0200 Subject: [PATCH 028/145] Update to ytdl-commit-a08f2b7 (#10012) [ie] Rework JWPlayer extraction - https://github.com/ytdl-org/youtube-dl/commit/f66372403fd9e1661199fea100ba2600fa9697b2 [ie/gbnews] Add extractor - https://github.com/ytdl-org/youtube-dl/commit/70f230f9cf28e948662599b6257cb7d1262870e3 [ie/caffeinetv] Add extractor - https://github.com/ytdl-org/youtube-dl/commit/40bd5c18153afe765caa6726302ee1dd8a9a2ce6 [ie/youporn] Improve extraction - https://github.com/ytdl-org/youtube-dl/commit/0b2ce3685e02ea1a3ccee1026572e081b8f6ac83 [ie/youporn] Add playlist extractors - https://github.com/ytdl-org/youtube-dl/commit/668332b9733023ca2e927eeb2208725022248af8 Closes #9188, Closes #9523 Authored by: Grub4K, bashonly --- README.md | 2 +- yt_dlp/extractor/_extractors.py | 12 +- yt_dlp/extractor/caffeinetv.py | 74 ++++++ yt_dlp/extractor/common.py | 47 ++-- yt_dlp/extractor/gbnews.py | 107 +++++++++ yt_dlp/extractor/youporn.py | 391 +++++++++++++++++++++++++++++++- 6 files changed, 588 insertions(+), 45 deletions(-) create mode 100644 yt_dlp/extractor/caffeinetv.py create mode 100644 yt_dlp/extractor/gbnews.py diff --git a/README.md b/README.md index 0636d2f6e7ba..5965d600ed09 100644 --- a/README.md +++ b/README.md @@ -2123,7 +2123,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: ### New features -* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@a08f2b7**](https://github.com/ytdl-org/youtube-dl/commit/a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e287e04bc196..37e6fc318e4c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -308,6 +308,7 @@ from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .caffeinetv import CaffeineTVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE @@ -720,6 +721,7 @@ from .gamestar import GameStarIE from .gaskrank import GaskrankIE from .gazeta import GazetaIE +from .gbnews import GBNewsIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE @@ -2501,7 +2503,15 @@ YouNowLiveIE, YouNowMomentIE, ) -from .youporn import YouPornIE +from .youporn import ( + YouPornCategoryIE, + YouPornChannelIE, + YouPornCollectionIE, + YouPornIE, + YouPornStarIE, + YouPornTagIE, + YouPornVideosIE, +) from .zaiko import ( ZaikoETicketIE, ZaikoIE, diff --git a/yt_dlp/extractor/caffeinetv.py b/yt_dlp/extractor/caffeinetv.py new file mode 100644 index 000000000000..aa107f858581 --- /dev/null +++ b/yt_dlp/extractor/caffeinetv.py @@ -0,0 +1,74 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + parse_iso8601, + traverse_obj, + urljoin, +) + + +class CaffeineTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)' + _TESTS = [{ + 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', + 'info_dict': { + 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', + 'ext': 'mp4', + 'title': 'GOOOOD MORNINNNNN #highlights', + 'timestamp': 1654702180, + 'upload_date': '20220608', + 'uploader': 'RahJON Wicc', + 'uploader_id': 'TsuSurf', + 'duration': 3145, + 'age_limit': 17, + 'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'tags': ['highlights', 'battlerap'], + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id) + broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {} + + video_url = broadcast_info['video_url'] + ext = determine_ext(video_url) + if ext == 'm3u8': + formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') + else: + formats = [{'url': video_url}] + + return { + 'id': video_id, + 'formats': formats, + **traverse_obj(json_data, { + 'like_count': ('like_count', {int_or_none}), + 'view_count': ('view_count', {int_or_none}), + 'comment_count': ('comment_count', {int_or_none}), + 'tags': ('tags', ..., {str}, {lambda x: x or None}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': (((None, 'user'), 'username'), {str}, any), + 'is_live': ('is_live', {bool}), + }), + **traverse_obj(broadcast_info, { + 'title': ('broadcast_title', {str}), + 'duration': ('content_duration', {int_or_none}), + 'timestamp': ('broadcast_start_time', {parse_iso8601}), + 'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), + }), + 'age_limit': { + # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system + 'FOUR_PLUS': 0, + 'NINE_PLUS': 9, + 'TWELVE_PLUS': 12, + 'SEVENTEEN_PLUS': 17, + }.get(broadcast_info.get('content_rating'), 17), + } diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a33cef354e08..38daad72efc6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3384,23 +3384,16 @@ def manifest_url(manifest): return formats def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): - mobj = re.search( - r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''', - webpage) - if mobj: - try: - jwplayer_data = self._parse_json(mobj.group('options'), - video_id=video_id, - transform_source=transform_source) - except ExtractorError: - pass - else: - if isinstance(jwplayer_data, dict): - return jwplayer_data + return self._search_json( + r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''', + webpage, 'JWPlayer data', video_id, + # must be a {...} or sequence, ending + contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))', + transform_source=transform_source, default=None) - def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs): jwplayer_data = self._find_jwplayer_data( - webpage, video_id, transform_source=js_to_json) + webpage, video_id, transform_source=transform_source) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) @@ -3432,22 +3425,14 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = track.get('kind') - if not track_kind or not isinstance(track_kind, str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) + for track in traverse_obj(video_data, ( + 'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))): + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) entry = { 'id': this_video_id, diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py new file mode 100644 index 000000000000..bb1554eea429 --- /dev/null +++ b/yt_dlp/extractor/gbnews.py @@ -0,0 +1,107 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + extract_attributes, + get_elements_html_by_class, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class GBNewsIE(InfoExtractor): + IE_DESC = 'GB News clips, features and live streams' + _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)' + + _PLATFORM = 'safari' + _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php' + _TESTS = [{ + 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row', + 'info_dict': { + 'id': '52264136', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row', + 'description': 'The post was criticised by former employers of the broadcaster', + 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism', + }, + }, { + 'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal', + 'info_dict': { + 'id': '52328390', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal', + 'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family', + 'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'', + } + }, { + 'url': 'https://www.gbnews.uk/watchlive', + 'info_dict': { + 'id': '1069', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'watchlive', + 'live_status': 'is_live', + 'title': r're:^GB News Live', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + @functools.lru_cache + def _get_ss_endpoint(self, data_id, data_env): + if not data_id: + data_id = 'GB003' + if not data_env: + data_env = 'production' + + json_data = self._download_json( + self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={ + 'id': data_id, + 'env': data_env, + }) + meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none})) + if not meta_url: + raise ExtractorError('No API host found') + + return meta_url + + def _real_extract(self, url): + display_id = self._match_id(url).rpartition('/')[2] + webpage = self._download_webpage(url, display_id) + + video_data = None + elements = get_elements_html_by_class('simplestream', webpage) + for html_tag in elements: + attributes = extract_attributes(html_tag) + if 'sidebar' not in (attributes.get('class') or ''): + video_data = attributes + if not video_data: + raise ExtractorError('Could not find video element', expected=True) + + endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env')) + + uvid = video_data['data-uvid'] + video_type = video_data.get('data-type') + if not video_type or video_type == 'vod': + video_type = 'show' + stream_data = self._download_json( + f'{endpoint_url}/api/{video_type}/stream/{uvid}', + uvid, 'Downloading stream JSON', query={ + 'key': video_data.get('data-key'), + 'platform': self._PLATFORM, + }) + if traverse_obj(stream_data, 'drm'): + self.report_drm(uvid) + + return { + 'id': uvid, + 'display_id': display_id, + 'title': self._og_search_title(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + 'formats': self._extract_m3u8_formats(traverse_obj(stream_data, ( + 'response', 'stream', {url_or_none})), uvid, 'mp4'), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'is_live': video_type == 'live', + } diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 6d4e31bf34b6..0e047aa16191 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -1,19 +1,27 @@ +import itertools import re from .common import InfoExtractor from ..utils import ( + ExtractorError, + clean_html, extract_attributes, + get_element_by_class, + get_element_by_id, + get_elements_html_by_class, int_or_none, merge_dicts, - str_to_int, + parse_count, + parse_qs, traverse_obj, unified_strdate, url_or_none, + urljoin, ) class YouPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?/?(?:[#?]|$)' _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)'] _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', @@ -34,7 +42,7 @@ class YouPornIE(InfoExtractor): 'tags': list, 'age_limit': 18, }, - 'skip': 'This video has been disabled', + 'skip': 'This video has been deactivated', }, { # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', @@ -72,7 +80,6 @@ class YouPornIE(InfoExtractor): 'id': '16290308', 'age_limit': 18, 'categories': [], - 'description': str, # TODO: detect/remove SEO spam description in ytdl backport 'display_id': 'tinderspecial-trailer1', 'duration': 298.0, 'ext': 'mp4', @@ -90,7 +97,17 @@ def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') self._set_cookie('.youporn.com', 'age_verified', '1') webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id) - definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions'] + + watchable = self._search_regex( + r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''', + webpage, 'watchability', default=None) + if not watchable: + msg = re.split(r'\s{2}', clean_html(get_element_by_id('mainContent', webpage)) or '')[0] + raise ExtractorError( + f'{self.IE_NAME} says: {msg}' if msg else 'Video unavailable', expected=True) + + player_vars = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id) + definitions = player_vars['mediaDefinitions'] def get_format_data(data, stream_type): info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any)) @@ -143,8 +160,10 @@ def get_format_data(data, stream_type): thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'duration', fatal=False)) + duration = traverse_obj(player_vars, ('duration', {int_or_none})) + if duration is None: + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration', fatal=False)) uploader = self._html_search_regex( r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', @@ -160,11 +179,11 @@ def get_format_data(data, stream_type): view_count = None views = self._search_regex( - r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage, - 'views', default=None) + r'(<div [^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>', + webpage, 'views', default=None) if views: - view_count = str_to_int(extract_attributes(views).get('data-value')) - comment_count = str_to_int(self._search_regex( + view_count = parse_count(extract_attributes(views).get('data-value')) + comment_count = parse_count(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', default=None)) @@ -182,7 +201,8 @@ def extract_tag_box(regex, title): data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) data.pop('url', None) - return merge_dicts(data, { + + result = merge_dicts(data, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -198,3 +218,350 @@ def extract_tag_box(regex, title): 'age_limit': age_limit, 'formats': formats, }) + + # Remove SEO spam "description" + description = result.get('description') + if description and description.startswith(f'Watch {result.get("title")} online'): + del result['description'] + + return result + + +class YouPornListBase(InfoExtractor): + def _get_next_url(self, url, pl_id, html): + return urljoin(url, self._search_regex( + r'''<a [^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''', + get_element_by_id('next', html) or '', 'next page', + group='url', default=None)) + + @classmethod + def _get_title_from_slug(cls, title_slug): + return re.sub(r'[_-]', ' ', title_slug) + + def _entries(self, url, pl_id, html=None, page_num=None): + start = page_num or 1 + for page in itertools.count(start): + if not html: + html = self._download_webpage( + url, pl_id, note=f'Downloading page {page}', fatal=page == start) + if not html: + return + for element in get_elements_html_by_class('video-title', html): + if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})): + yield self.url_result(video_url) + + if page_num is not None: + return + next_url = self._get_next_url(url, pl_id, html) + if not next_url or next_url == url: + return + url = next_url + html = None + + def _real_extract(self, url, html=None): + m_dict = self._match_valid_url(url).groupdict() + pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort')) + qs = {k: v[-1] for k, v in parse_qs(url).items() if v} + + base_id = pl_id or 'YouPorn' + title = self._get_title_from_slug(base_id) + if page_type: + title = f'{page_type.capitalize()} {title}' + base_id = [base_id.lower()] + if sort is None: + title += ' videos' + else: + title = f'{title} videos by {re.sub(r"[_-]", " ", sort)}' + base_id.append(sort) + if qs: + filters = list(map('='.join, sorted(qs.items()))) + title += f' ({",".join(filters)})' + base_id.extend(filters) + pl_id = '/'.join(base_id) + + return self.playlist_result( + self._entries(url, pl_id, html=html, page_num=int_or_none(qs.get('page'))), + playlist_id=pl_id, playlist_title=title) + + +class YouPornCategoryIE(YouPornListBase): + IE_DESC = 'YouPorn category, with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>category)/(?P<id>[^/?#&]+) + (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/category/popular-with-women/popular/', + 'info_dict': { + 'id': 'popular-with-women/popular', + 'title': 'Category popular with women videos by popular', + }, + 'playlist_mincount': 39, + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/category/popular-with-women/duration/?min_minutes=10', + 'info_dict': { + 'id': 'popular-with-women/duration/min_minutes=10', + 'title': 'Category popular with women videos by duration (min_minutes=10)', + }, + 'playlist_mincount': 2, + # 'playlist_maxcount': 30, + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/category/popular-with-women/popular?page=1', + 'info_dict': { + 'id': 'popular-with-women/popular/page=1', + 'title': 'Category popular with women videos by popular (page=1)', + }, + 'playlist_count': 36, + }] + + +class YouPornChannelIE(YouPornListBase): + IE_DESC = 'YouPorn channel, with sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>channel)/(?P<id>[^/?#&]+) + (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/channel/x-feeds/', + 'info_dict': { + 'id': 'x-feeds', + 'title': 'Channel X-Feeds videos', + }, + 'playlist_mincount': 37, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1', + 'info_dict': { + 'id': 'x-feeds/duration/page=1', + 'title': 'Channel X-Feeds videos by duration (page=1)', + }, + 'playlist_count': 24, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return re.sub(r'_', ' ', title_slug).title() + + +class YouPornCollectionIE(YouPornListBase): + IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>collection)s/videos/(?P<id>\d+) + (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/collections/videos/33044251/', + 'info_dict': { + 'id': '33044251', + 'title': 'Collection Sexy Lips videos', + 'uploader': 'ph-littlewillyb', + }, + 'playlist_mincount': 50, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1', + 'info_dict': { + 'id': '33044251/time/page=1', + 'title': 'Collection Sexy Lips videos by time (page=1)', + 'uploader': 'ph-littlewillyb', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + pl_id = self._match_id(url) + html = self._download_webpage(url, pl_id) + playlist = super()._real_extract(url, html=html) + infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class( + 'collection-infos', html)) or '') + title, uploader = self._search_regex( + r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)', + infos, 'title/uploader', group=('title', 'uploader'), default=(None, None)) + if title: + playlist.update({ + 'title': playlist['title'].replace(playlist['id'].split('/')[0], title), + 'uploader': uploader, + }) + + return playlist + + +class YouPornTagIE(YouPornListBase): + IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + porn(?P<type>tag)s/(?P<id>[^/?#&]+) + (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/porntags/austrian', + 'info_dict': { + 'id': 'austrian', + 'title': 'Tag austrian videos', + }, + 'playlist_mincount': 33, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10', + 'info_dict': { + 'id': 'austrian/duration/min_minutes=10', + 'title': 'Tag austrian videos by duration (min_minutes=10)', + }, + 'playlist_mincount': 10, + # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3, + # or more, varying with number of ads; let's set max as 9x4 + # NB col 1 may not be shown in non-JS page with site CSS and zoom 100% + # 'playlist_maxcount': 32, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/porntags/austrian/?page=1', + 'info_dict': { + 'id': 'austrian/page=1', + 'title': 'Tag austrian videos (page=1)', + }, + 'playlist_mincount': 32, + # 'playlist_maxcount': 34, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }] + + def _real_extract(self, url): + self.report_warning( + 'YouPorn tag pages are not correctly cached and ' + 'often return incorrect results', only_once=True) + return super()._real_extract(url) + + +class YouPornStarIE(YouPornListBase): + IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>pornstar)/(?P<id>[^/?#&]+) + (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/pornstar/daynia/', + 'info_dict': { + 'id': 'daynia', + 'title': 'Pornstar Daynia videos', + 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+', + }, + 'playlist_mincount': 40, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/pornstar/daynia/?page=1', + 'info_dict': { + 'id': 'daynia/page=1', + 'title': 'Pornstar Daynia videos (page=1)', + 'description': 're:.{180,}', + }, + 'playlist_count': 26, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return re.sub(r'_', ' ', title_slug).title() + + def _real_extract(self, url): + pl_id = self._match_id(url) + html = self._download_webpage(url, pl_id) + playlist = super()._real_extract(url, html=html) + INFO_ELEMENT_RE = r'''(?x) + <div [^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*> + (?P<info>[\s\S]+?)(?:</div>\s*){6,} + ''' + + if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default=''): + infos = re.sub( + r'(?:\s*nl=nl)+\s*', ' ', + re.sub(r'(?u)\s+', ' ', clean_html(re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '') + + return { + **playlist, + 'description': infos.strip() or None, + } + + +class YouPornVideosIE(YouPornListBase): + IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?:(?P<id>browse)/)? + (?P<sort>(?(id) + (?:duration|rating|time|views)| + (?:most_(?:favou?rit|view)ed|recommended|top_rated)?)) + (?:[/#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/', + 'info_dict': { + 'id': 'youporn', + 'title': 'YouPorn videos', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/recommended', + 'info_dict': { + 'id': 'youporn/recommended', + 'title': 'YouPorn videos by recommended', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/top_rated', + 'info_dict': { + 'id': 'youporn/top_rated', + 'title': 'YouPorn videos by top rated', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/browse/time', + 'info_dict': { + 'id': 'browse/time', + 'title': 'YouPorn videos by time', + }, + 'only_matching': True, + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2', + 'info_dict': { + 'id': 'youporn/most_favorited/max_minutes=2/res=VR', + 'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)', + }, + 'playlist_mincount': 10, + # 'playlist_maxcount': 28, + }, { + 'note': 'Filtered paginated list with several pages', + 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5', + 'info_dict': { + 'id': 'youporn/most_favorited/max_minutes=5/res=VR', + 'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)', + }, + 'playlist_mincount': 45, + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/browse/time?page=1', + 'info_dict': { + 'id': 'browse/time/page=1', + 'title': 'YouPorn videos by time (page=1)', + }, + 'playlist_count': 36, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return 'YouPorn' if title_slug == 'browse' else title_slug From 96a134dea6397a5f2131947c427aac52c8b4e677 Mon Sep 17 00:00:00 2001 From: coletdjnz <coletdjnz@protonmail.com> Date: Mon, 27 May 2024 09:13:12 +1200 Subject: [PATCH 029/145] [ie/youtube] Extract upload timestamp if available (#9856) Closes #4962, Closes #9829 Authored by: coletdjnz --- README.md | 1 + test/test_utils.py | 7 ++ yt_dlp/extractor/youtube.py | 134 +++++++++++++++++++----------------- yt_dlp/options.py | 2 +- yt_dlp/utils/_utils.py | 19 ++--- 5 files changed, 91 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 5965d600ed09..1b4071132ec4 100644 --- a/README.md +++ b/README.md @@ -2333,6 +2333,7 @@ These options may no longer work as intended --write-annotations No supported site has annotations now --no-write-annotations Default --compat-options seperate-video-versions No longer needed + --compat-options no-youtube-prefer-utc-upload-date No longer supported #### Removed These options were deprecated since 2014 and have now been entirely removed diff --git a/test/test_utils.py b/test/test_utils.py index 816cf03f6b6a..77fadbbeab37 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -5,6 +5,7 @@ import sys import unittest import warnings +import datetime as dt sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -27,6 +28,7 @@ ExtractorError, InAdvancePagedList, LazyList, + NO_DEFAULT, OnDemandPagedList, Popen, age_restricted, @@ -768,6 +770,11 @@ def test_encode_compat_str(self): def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None) + # default does not override timezone in date_str + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e676c5cde24c..54da4e3622b6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1325,6 +1325,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, } }, { @@ -1368,6 +1369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, }, 'params': { 'skip_download': True, @@ -1454,6 +1456,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1401991663, }, }, { @@ -1513,6 +1516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', 'uploader_id': '@ProjektMelody', + 'timestamp': 1577508724, }, }, { @@ -1618,6 +1622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@Olympics', 'uploader_id': '@Olympics', 'channel_is_verified': True, + 'timestamp': 1440707674, }, 'params': { 'skip_download': 'requires avconv', @@ -1651,6 +1656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫ᄋᄅ', 'uploader_url': 'https://www.youtube.com/@AllenMeow', 'uploader_id': '@AllenMeow', + 'timestamp': 1299776999, }, }, # url_encoded_fmt_stream_map is empty string @@ -1794,6 +1800,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }], 'params': {'skip_download': True}, + 'skip': 'Not multifeed anymore', }, { # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) @@ -1902,6 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Berkman Klein Center for Internet & Society', 'uploader_id': '@BKCHarvard', 'uploader_url': 'https://www.youtube.com/@BKCHarvard', + 'timestamp': 1422422076, }, 'params': { 'skip_download': True, @@ -1937,6 +1945,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@BernieSanders', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1447987198, }, 'params': { 'skip_download': True, @@ -2000,6 +2009,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Vsauce', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1484761047, }, 'params': { 'skip_download': True, @@ -2155,6 +2165,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'l\'Or Vert asbl', 'uploader_url': 'https://www.youtube.com/@ElevageOrVert', 'uploader_id': '@ElevageOrVert', + 'timestamp': 1497343210, }, 'params': { 'skip_download': True, @@ -2193,6 +2204,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Csharp-video-tutorialsBlogspot', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1377976349, }, 'params': { 'skip_download': True, @@ -2275,6 +2287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@CBSMornings', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1405513526, } }, { @@ -2292,7 +2305,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'channel': 'Walk around Japan', 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'], - 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp', + 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg', 'age_limit': 0, 'availability': 'public', 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', @@ -2302,6 +2315,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Walk around Japan', 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124', 'uploader_id': '@walkaroundjapan7124', + 'timestamp': 1605884416, }, 'params': { 'skip_download': True, @@ -2397,6 +2411,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1395685455, }, 'params': {'format': 'mhtml', 'skip_download': True} }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) @@ -2426,37 +2441,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@LeonNguyen', 'uploader_id': '@LeonNguyen', 'heatmap': 'count:100', + 'timestamp': 1641170939, } - }, { - # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date - 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', - 'info_dict': { - 'id': '2NUZ8W2llS4', - 'ext': 'mp4', - 'title': 'The NP that test your phone performance 🙂', - 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', - 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', - 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', - 'duration': 21, - 'view_count': int, - 'age_limit': 0, - 'categories': ['Gaming'], - 'tags': 'count:23', - 'playable_in_embed': True, - 'live_status': 'not_live', - 'upload_date': '20220102', - 'like_count': int, - 'availability': 'public', - 'channel': 'Leon Nguyen', - 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', - 'comment_count': int, - 'channel_follower_count': int, - 'uploader': 'Leon Nguyen', - 'uploader_url': 'https://www.youtube.com/@LeonNguyen', - 'uploader_id': '@LeonNguyen', - 'heatmap': 'count:100', - }, - 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']} }, { # date text is premiered video, ensure upload date in UTC (published 1641172509) 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', @@ -2488,38 +2474,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1641172509, } }, - { # continuous livestream. Microformat upload date should be preferred. - # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 - 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + { # continuous livestream. + # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00 + 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk', 'info_dict': { - 'id': 'kgx4WGK0oNU', - 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'id': 'jfKfPfyJRdk', 'ext': 'mp4', - 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', - 'availability': 'public', + 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow', + 'like_count': int, + 'uploader': 'Lofi Girl', + 'categories': ['Music'], + 'concurrent_view_count': int, + 'playable_in_embed': True, + 'timestamp': 1657627949, + 'release_date': '20220712', + 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow', + 'description': 'md5:13a6f76df898f5674f9127139f3df6f7', 'age_limit': 0, - 'release_timestamp': 1637975704, - 'upload_date': '20210619', - 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', - 'live_status': 'is_live', - 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', - 'channel': 'Abao in Tokyo', + 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg', + 'release_timestamp': 1657641570, + 'uploader_url': 'https://www.youtube.com/@LofiGirl', 'channel_follower_count': int, - 'release_date': '20211127', - 'tags': 'count:39', - 'categories': ['People & Blogs'], - 'like_count': int, + 'channel_is_verified': True, + 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to', 'view_count': int, - 'playable_in_embed': True, - 'description': 'md5:2ef1d002cad520f65825346e2084e49d', - 'concurrent_view_count': int, - 'uploader': 'Abao in Tokyo', - 'uploader_url': 'https://www.youtube.com/@abaointokyo', - 'uploader_id': '@abaointokyo', + 'live_status': 'is_live', + 'tags': 'count:32', + 'channel': 'Lofi Girl', + 'availability': 'public', + 'upload_date': '20220712', + 'uploader_id': '@LofiGirl', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', 'info_dict': { @@ -2545,6 +2534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@lesmiscore', 'uploader': 'Lesmiscore', 'uploader_url': 'https://www.youtube.com/@lesmiscore', + 'timestamp': 1648005313, } }, { # Prefer primary title+description language metadata by default @@ -2572,6 +2562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1662677394, }, 'params': {'skip_download': True} }, { @@ -2585,7 +2576,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 5, 'live_status': 'not_live', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', - 'upload_date': '20220728', + 'upload_date': '20220729', 'view_count': int, 'categories': ['People & Blogs'], 'thumbnail': r're:^https?://.*\.jpg', @@ -2598,6 +2589,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1659073275, + 'like_count': int, }, 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}}, 'expected_warnings': [r'Preferring "fr" translated fields'], @@ -2663,6 +2656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_id': '@ProjektMelody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', + 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, }, @@ -2697,6 +2691,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@sana_natori', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1671798112, }, }, { @@ -2766,6 +2761,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries', 'uploader_id': '@ChristopherSykesDocumentaries', 'heatmap': 'count:100', + 'timestamp': 1211825920, }, 'params': { 'skip_download': True, @@ -4622,19 +4618,31 @@ def process_language(container, base_url, lang_code, sub_name, query): 'uploader_id': channel_handle, 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None), }) + + # We only want timestamp IF it has time precision AND a timezone + # Currently the uploadDate in microformats appears to be in US/Pacific timezone. + timestamp = ( + parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT) + or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT) + ) + upload_date = ( + dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else + ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + )) + + # In the case we cannot get the timestamp: # The upload date for scheduled, live and past live streams / premieres in microformats # may be different from the stream date. Although not in UTC, we will prefer it in this case. # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 - upload_date = ( - unified_strdate(get_first(microformats, 'uploadDate')) - or unified_strdate(search_meta('uploadDate'))) - if not upload_date or ( - live_status in ('not_live', None) - and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) - ): + if not upload_date or (not timestamp and live_status in ('not_live', None)): + # this should be in UTC, as configured in the cookie/client context upload_date = strftime_or_none( self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date + info['upload_date'] = upload_date + info['timestamp'] = timestamp if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked diff --git a/yt_dlp/options.py b/yt_dlp/options.py index faa1ee563492..997b575cd46a 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -478,7 +478,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): }, 'aliases': { 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], - '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], + '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [], } diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index b637669124f6..5f458ea45474 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1134,7 +1134,7 @@ def is_path_like(f): return isinstance(f, (str, bytes, os.PathLike)) -def extract_timezone(date_str): +def extract_timezone(date_str, default=None): m = re.search( r'''(?x) ^.{8,}? # >=8 char non-TZ prefix, if present @@ -1146,21 +1146,25 @@ def extract_timezone(date_str): (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm $) ''', date_str) + timezone = None + if not m: m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str) timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip()) if timezone is not None: date_str = date_str[:-len(m.group('tz'))] - timezone = dt.timedelta(hours=timezone or 0) + timezone = dt.timedelta(hours=timezone) else: date_str = date_str[:-len(m.group('tz'))] - if not m.group('sign'): - timezone = dt.timedelta() - else: + if m.group('sign'): sign = 1 if m.group('sign') == '+' else -1 timezone = dt.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) + + if timezone is None and default is not NO_DEFAULT: + timezone = default or dt.timedelta() + return timezone, date_str @@ -1172,10 +1176,9 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): date_str = re.sub(r'\.[0-9]+', '', date_str) - if timezone is None: - timezone, date_str = extract_timezone(date_str) + timezone, date_str = extract_timezone(date_str, timezone) - with contextlib.suppress(ValueError): + with contextlib.suppress(ValueError, TypeError): date_format = f'%Y-%m-%d{delimiter}%H:%M:%S' dt_ = dt.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt_.timetuple()) From 347f13dd9bccc2b4db3ea25689410d45d8370ed4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 26 May 2024 16:16:36 -0500 Subject: [PATCH 030/145] [ie/tiktok:user] Fix extractor (#9661) Closes #3776, Closes #4996 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 348 ++++++++++++++++++++----------------- 1 file changed, 189 insertions(+), 159 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 7772dd1f281f..4113660a5821 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -3,6 +3,7 @@ import json import random import re +import string import time import uuid @@ -11,7 +12,6 @@ from ..networking import HEADRequest from ..utils import ( ExtractorError, - LazyList, UnsupportedError, UserNotLive, determine_ext, @@ -236,7 +236,7 @@ def _extract_web_data_and_status(self, url, video_id, fatal=True): return video_data, status - def _get_subtitles(self, aweme_detail, aweme_id, user_url): + def _get_subtitles(self, aweme_detail, aweme_id, user_name): # TODO: Extract text positioning info subtitles = {} # aweme/detail endpoint subs @@ -267,9 +267,9 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_url): }) # webpage subs if not subtitles: - if user_url: # only _parse_aweme_video_app needs to extract the webpage here + if user_name: # only _parse_aweme_video_app needs to extract the webpage here aweme_detail, _ = self._extract_web_data_and_status( - f'{user_url}/video/{aweme_id}', aweme_id, fatal=False) + self._create_url(user_name, aweme_id), aweme_id, fatal=False) for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ 'ext': remove_start(caption.get('Format'), 'web'), @@ -394,11 +394,7 @@ def extract_addr(addr, add_meta={}): }) stats_info = aweme_detail.get('statistics') or {} - author_info = aweme_detail.get('author') or {} music_info = aweme_detail.get('music') or {} - user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, - 'sec_uid', 'id', 'uid', 'unique_id', - expected_type=str_or_none, get_all=False)) labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str) contained_music_track = traverse_obj( @@ -412,6 +408,13 @@ def extract_addr(addr, add_meta={}): else: music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str})) + author_info = traverse_obj(aweme_detail, ('author', { + 'uploader': ('unique_id', {str}), + 'uploader_id': ('uid', {str_or_none}), + 'channel': ('nickname', {str}), + 'channel_id': ('sec_uid', {str}), + })) + return { 'id': aweme_id, **traverse_obj(aweme_detail, { @@ -425,21 +428,20 @@ def extract_addr(addr, add_meta={}): 'repost_count': 'share_count', 'comment_count': 'comment_count', }, expected_type=int_or_none), - **traverse_obj(author_info, { - 'uploader': ('unique_id', {str}), - 'uploader_id': ('uid', {str_or_none}), - 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat - 'channel': ('nickname', {str}), - 'channel_id': ('sec_uid', {str}), - }), - 'uploader_url': user_url, + **author_info, + 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None), + 'uploader_url': format_field( + author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None), 'track': music_track, 'album': str_or_none(music_info.get('album')) or None, 'artists': re.split(r'(?:, | & )', music_author) if music_author else None, 'formats': formats, - 'subtitles': self.extract_subtitles(aweme_detail, aweme_id, user_url), + 'subtitles': self.extract_subtitles( + aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')), 'thumbnails': thumbnails, - 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000), + 'duration': (traverse_obj(video_info, ( + (None, 'download_addr'), 'duration', {functools.partial(int_or_none, scale=1000)}, any)) + or traverse_obj(music_info, ('duration', {int_or_none}))), 'availability': self._availability( is_private='Private' in labels, needs_subscription='Friends only' in labels, @@ -447,23 +449,17 @@ def extract_addr(addr, add_meta={}): '_format_sort_fields': ('quality', 'codec', 'size', 'br'), } - def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): - video_info = aweme_detail['video'] - author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={}) - music_info = aweme_detail.get('music') or {} - stats_info = aweme_detail.get('stats') or {} - channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False) - user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None - - formats = [] - width = int_or_none(video_info.get('width')) - height = int_or_none(video_info.get('height')) - ratio = try_call(lambda: width / height) or 0.5625 + def _extract_web_formats(self, aweme_detail): COMMON_FORMAT_INFO = { 'ext': 'mp4', 'vcodec': 'h264', 'acodec': 'aac', } + video_info = traverse_obj(aweme_detail, ('video', {dict})) or {} + play_width = int_or_none(video_info.get('width')) + play_height = int_or_none(video_info.get('height')) + ratio = try_call(lambda: play_width / play_height) or 0.5625 + formats = [] for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])): format_info, res = self._parse_url_key( @@ -488,7 +484,7 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): else: # landscape: res/dimension is height x = int(dimension * ratio) format_info.update({ - 'width': x - (x % 2), + 'width': x + (x % 2), 'height': dimension, }) @@ -500,15 +496,15 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): }) # We don't have res string for play formats, but need quality for sorting & de-duplication - play_quality = traverse_obj(formats, (lambda _, v: v['width'] == width, 'quality', any)) + play_quality = traverse_obj(formats, (lambda _, v: v['width'] == play_width, 'quality', any)) for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})): formats.append({ **COMMON_FORMAT_INFO, 'format_id': 'play', 'url': self._proto_relative_url(play_url), - 'width': width, - 'height': height, + 'width': play_width, + 'height': play_height, 'quality': play_quality, }) @@ -528,8 +524,8 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): }) # Is it a slideshow with only audio for download? - if not formats and traverse_obj(music_info, ('playUrl', {url_or_none})): - audio_url = music_info['playUrl'] + if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})): + audio_url = aweme_detail['music']['playUrl'] ext = traverse_obj(parse_qs(audio_url), ( 'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a' formats.append({ @@ -540,23 +536,31 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): 'vcodec': 'none', }) - thumbnails = [] - for thumb_url in traverse_obj(aweme_detail, ( - (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})): - thumbnails.append({ - 'url': self._proto_relative_url(thumb_url), - 'width': width, - 'height': height, - }) + return formats + + def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False): + author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), { + 'channel': ('nickname', {str}), + 'channel_id': (('authorSecId', 'secUid'), {str}), + 'uploader': (('uniqueId', 'author'), {str}), + 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}), + }), get_all=False) return { 'id': video_id, - **traverse_obj(music_info, { + 'formats': None if extract_flat else self._extract_web_formats(aweme_detail), + 'subtitles': None if extract_flat else self.extract_subtitles(aweme_detail, video_id, None), + 'http_headers': {'Referer': webpage_url}, + **author_info, + 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None), + 'uploader_url': format_field( + author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None), + **traverse_obj(aweme_detail, ('music', { 'track': ('title', {str}), 'album': ('album', {str}, {lambda x: x or None}), - 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), + 'artists': ('authorName', {str}, {lambda x: re.split(r'(?:, | & )', x) if x else None}), 'duration': ('duration', {int_or_none}), - }), + })), **traverse_obj(aweme_detail, { 'title': ('desc', {str}), 'description': ('desc', {str}), @@ -564,26 +568,17 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): 'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}), 'timestamp': ('createTime', {int_or_none}), }), - **traverse_obj(author_info or aweme_detail, { - 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat - 'channel': ('nickname', {str}), - 'uploader': (('uniqueId', 'author'), {str}), - 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}), - }, get_all=False), - **traverse_obj(stats_info, { + **traverse_obj(aweme_detail, ('stats', { 'view_count': 'playCount', 'like_count': 'diggCount', 'repost_count': 'shareCount', 'comment_count': 'commentCount', - }, expected_type=int_or_none), - 'channel_id': channel_id, - 'uploader_url': user_url, - 'formats': formats, - 'subtitles': self.extract_subtitles(aweme_detail, video_id, None), - 'thumbnails': thumbnails, - 'http_headers': { - 'Referer': webpage_url, - } + }), expected_type=int_or_none), + 'thumbnails': traverse_obj(aweme_detail, ( + (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), { + 'url': ({url_or_none}, {self._proto_relative_url}), + }, + )), } @@ -620,21 +615,21 @@ class TikTokIE(TikTokBaseIE): 'skip': '404 Not Found', }, { 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', - 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b', + 'md5': 'f21112672ee4ce05ca390fb6522e1b6f', 'info_dict': { 'id': '6742501081818877190', 'ext': 'mp4', 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94', 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', 'duration': 27, - 'height': 960, - 'width': 540, + 'height': 1024, + 'width': 576, 'uploader': 'patrox', 'uploader_id': '18702747', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', + 'uploader_url': 'https://www.tiktok.com/@patrox', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'channel': 'patroX', - 'creators': ['patroX'], 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20190930', 'timestamp': 1569860870, @@ -646,7 +641,7 @@ class TikTokIE(TikTokBaseIE): 'track': 'Big Fun', }, }, { - # Banned audio, only available on the app + # Banned audio, was available on the app, now works with web too 'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402', 'info_dict': { 'id': '6984138651336838402', @@ -655,9 +650,9 @@ class TikTokIE(TikTokBaseIE): 'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥', 'uploader': 'barudakhb_', 'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', - 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'], 'uploader_id': '6974687867511718913', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', + 'uploader_url': 'https://www.tiktok.com/@barudakhb_', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'track': 'Boka Dance', 'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'], @@ -680,7 +675,6 @@ class TikTokIE(TikTokBaseIE): 'description': 'Slap and Run!', 'uploader': 'user440922249', 'channel': 'Slap And Run', - 'creators': ['Slap And Run'], 'uploader_id': '7036055384943690754', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', 'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', @@ -694,7 +688,7 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'params': {'skip_download': True}, # XXX: unable to download video data: HTTP Error 403: Forbidden + 'skip': 'This video is unavailable', }, { # Video without title and description 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694', @@ -705,9 +699,9 @@ class TikTokIE(TikTokBaseIE): 'description': '', 'uploader': 'pokemonlife22', 'channel': 'Pokemon', - 'creators': ['Pokemon'], 'uploader_id': '6820838815978423302', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', + 'uploader_url': 'https://www.tiktok.com/@pokemonlife22', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'track': 'original sound', 'timestamp': 1643714123, @@ -752,13 +746,14 @@ class TikTokIE(TikTokBaseIE): 'title': 'TikTok video #7139980461132074283', 'description': '', 'channel': 'Antaura', - 'creators': ['Antaura'], 'uploader': '_le_cannibale_', 'uploader_id': '6604511138619654149', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', + 'uploader_url': 'https://www.tiktok.com/@_le_cannibale_', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'artists': ['nathan !'], 'track': 'grahamscott canon', + 'duration': 10, 'upload_date': '20220905', 'timestamp': 1662406249, 'view_count': int, @@ -769,18 +764,18 @@ class TikTokIE(TikTokBaseIE): }, }, { # only available via web - 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME - 'md5': '6aba7fad816e8709ff2c149679ace165', + 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', + 'md5': '4cdefa501ac8ac20bf04986e10916fea', 'info_dict': { 'id': '7206382937372134662', 'ext': 'mp4', 'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'channel': 'MoxyPatch', - 'creators': ['MoxyPatch'], 'uploader': 'moxypatch', 'uploader_id': '7039142049363379205', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', + 'uploader_url': 'https://www.tiktok.com/@moxypatch', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', 'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', 'artists': ['your worst nightmare'], 'track': 'original sound', @@ -809,7 +804,6 @@ class TikTokIE(TikTokBaseIE): 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'channel': 'tate mcrae', - 'creators': ['tate mcrae'], 'artists': ['tate mcrae'], 'track': 'original sound', 'upload_date': '20220609', @@ -821,7 +815,7 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, 'thumbnail': r're:^https://.+\.webp', }, - 'skip': 'Unavailable via feed API, no formats available via web', + 'skip': 'Unavailable via feed API, only audio available via web', }, { # Slideshow, audio-only m4a format 'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594', @@ -833,13 +827,14 @@ class TikTokIE(TikTokBaseIE): 'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ', 'uploader': 'hara_yoimiya', 'uploader_id': '6582536342634676230', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', + 'uploader_url': 'https://www.tiktok.com/@hara_yoimiya', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', 'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', - 'channel': 'лампочка', - 'creators': ['лампочка'], + 'channel': 'лампочка(!)', 'artists': ['Øneheart'], 'album': 'watching the stars', 'track': 'watching the stars', + 'duration': 60, 'upload_date': '20230708', 'timestamp': 1688816612, 'view_count': int, @@ -876,102 +871,141 @@ def _real_extract(self, url): class TikTokUserIE(TikTokBaseIE): IE_NAME = 'tiktok:user' - _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])' - _WORKING = False + _VALID_URL = r'(?:tiktokuser:|https?://(?:www\.)?tiktok\.com/@)(?P<id>[\w.-]+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://tiktok.com/@corgibobaa?lang=en', 'playlist_mincount': 45, 'info_dict': { - 'id': '6935371178089399301', + 'id': 'MS4wLjABAAAAepiJKgwWhulvCpSuUVsp7sgVVsFJbbNaLeQ6OQ0oAJERGDUIXhb2yxxHZedsItgT', 'title': 'corgibobaa', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@6820838815978423302', 'playlist_mincount': 5, 'info_dict': { - 'id': '6820838815978423302', + 'id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'title': '6820838815978423302', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, 'info_dict': { - 'id': '79005827461758976', + 'id': 'MS4wLjABAAAAiKfaDWeCsT3IHwY77zqWGtVRIy9v4ws1HbVi7auP1Vx7dJysU_hc5yRiGywojRD6', 'title': 'meme', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] + }, { + 'url': 'tiktokuser:MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', + 'playlist_mincount': 31, + 'info_dict': { + 'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', + }, }] + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0' + _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/' - r''' # TODO: Fix by adding _signature to api_url - def _entries(self, webpage, user_id, username): - secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username) - verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id') - if not verifyfp_cookie: - raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True) - api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor=' - cursor = '0' - for page in itertools.count(): - data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page) - for video in data_json.get('itemList', []): - video_id = video['id'] - video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}' - yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc'))) - if not data_json.get('hasMore'): - break - cursor = data_json['cursor'] - ''' - - def _video_entries_api(self, webpage, user_id, username): - query = { - 'user_id': user_id, - 'count': 21, - 'max_cursor': 0, - 'min_cursor': 0, - 'retry_type': 'no_retry', - 'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + def _build_web_query(self, sec_uid, cursor): + return { + 'aid': '1988', + 'app_language': 'en', + 'app_name': 'tiktok_web', + 'browser_language': 'en-US', + 'browser_name': 'Mozilla', + 'browser_online': 'true', + 'browser_platform': 'Win32', + 'browser_version': '5.0 (Windows)', + 'channel': 'tiktok_web', + 'cookie_enabled': 'true', + 'count': '15', + 'cursor': cursor, + 'device_id': self._DEVICE_ID, + 'device_platform': 'web_pc', + 'focus_state': 'true', + 'from_page': 'user', + 'history_len': '2', + 'is_fullscreen': 'false', + 'is_page_visible': 'true', + 'language': 'en', + 'os': 'windows', + 'priority_region': '', + 'referer': '', + 'region': 'US', + 'screen_height': '1080', + 'screen_width': '1920', + 'secUid': sec_uid, + 'type': '1', # pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest + 'tz_name': 'UTC', + 'verifyFp': f'verify_{"".join(random.choices(string.hexdigits, k=7))}', + 'webcast_language': 'en', } + def _entries(self, sec_uid, user_name): + display_id = user_name or sec_uid + + cursor = int(time.time() * 1E3) for page in itertools.count(1): - for retry in self.RetryManager(): - try: - post_list = self._call_api( - 'aweme/post', query, username, note=f'Downloading user video list page {page}', - errnote='Unable to download user video list') - except ExtractorError as e: - if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: - retry.error = e - continue - raise - yield from post_list.get('aweme_list', []) - if not post_list.get('has_more'): + response = self._download_json( + self._API_BASE_URL, display_id, f'Downloading page {page}', + query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT}) + + for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): + video_id = video['id'] + webpage_url = self._create_url(display_id, video_id) + yield self.url_result( + webpage_url, TikTokIE, + **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True)) + + old_cursor = cursor + cursor = traverse_obj( + response, ('itemList', -1, 'createTime', {functools.partial(int_or_none, invscale=1E3)})) + if not cursor: + # User may not have posted within this ~1 week lookback, so manually adjust cursor + cursor = old_cursor - 7 * 86_400_000 + # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed + if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'): break - query['max_cursor'] = post_list['max_cursor'] - - def _entries_api(self, user_id, videos): - for video in videos: - yield { - **self._parse_aweme_video_app(video), - 'extractor_key': TikTokIE.ie_key(), - 'extractor': 'TikTok', - 'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}', - } - def _real_extract(self, url): - user_name = self._match_id(url) - webpage = self._download_webpage(url, user_name, headers={ - 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' - }) - user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name + def _get_sec_uid(self, user_url, user_name, msg): + webpage = self._download_webpage( + user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'}, + note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or '' + return (traverse_obj(self._get_universal_data(webpage, user_name), + ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) + or traverse_obj(self._get_sigi_state(webpage, user_name), + ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}), + ('UserModule', 'users', ..., 'secUid', {str}, any))) - videos = LazyList(self._video_entries_api(webpage, user_id, user_name)) - thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0)) + def _real_extract(self, url): + user_name, sec_uid = self._match_id(url), None + if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name): + user_name, sec_uid = None, mobj.group(0) + else: + sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user') + or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live')) + + if not sec_uid: + webpage = self._download_webpage( + f'https://www.tiktok.com/embed/@{user_name}', user_name, + note='Downloading user embed page', fatal=False) or '' + data = traverse_obj(self._search_json( + r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>', + webpage, 'data', user_name, default={}), + ('source', 'data', f'/embed/@{user_name}', {dict})) + + for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})): + webpage_url = self._create_url(user_name, aweme_id) + video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False) + sec_uid = self._parse_aweme_video_web( + video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id') + if sec_uid: + break + + if not sec_uid: + raise ExtractorError( + 'Unable to extract secondary user ID. If you are able to get the channel_id ' + 'from a video posted by this user, try using "tiktokuser:channel_id" as the ' + 'input URL (replacing `channel_id` with its actual value)', expected=True) - return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail) + return self.playlist_result(self._entries(sec_uid, user_name), sec_uid, user_name) class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -1098,7 +1132,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 19, 'timestamp': 1620905839, 'upload_date': '20210513', @@ -1123,7 +1156,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel': '杨超越工作室', - 'creators': ['杨超越工作室'], 'duration': 42, 'timestamp': 1625739481, 'upload_date': '20210708', @@ -1148,7 +1180,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 17, 'timestamp': 1619098692, 'upload_date': '20210422', @@ -1190,7 +1221,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 15, 'timestamp': 1621261163, 'upload_date': '20210517', From 119d41f27061d220d276a2d38cfc8d873437452a Mon Sep 17 00:00:00 2001 From: imanoreotwe <4606611+imanoreotwe@users.noreply.github.com> Date: Sun, 26 May 2024 15:26:30 -0600 Subject: [PATCH 031/145] [ie/tiktok:collection] Add extractor (#9986) Closes #9984 Authored by: imanoreotwe, bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/tiktok.py | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 37e6fc318e4c..e9cd38a65188 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2074,6 +2074,7 @@ ) from .tiktok import ( DouyinIE, + TikTokCollectionIE, TikTokEffectIE, TikTokIE, TikTokLiveIE, diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4113660a5821..ab8efc19ed66 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1117,6 +1117,64 @@ def _real_extract(self, url): return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id) +class TikTokCollectionIE(TikTokBaseIE): + IE_NAME = 'tiktok:collection' + _VALID_URL = r'https?://www\.tiktok\.com/@(?P<user_id>[\w.-]+)/collection/(?P<title>[^/?#]+)-(?P<id>\d+)/?(?:[?#]|$)' + _TESTS = [{ + # playlist should have exactly 9 videos + 'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462', + 'info_dict': { + 'id': '7371330159376370462', + 'title': 'imanoreotwe-count-test' + }, + 'playlist_count': 9 + }, { + # tests returning multiple pages of a large collection + 'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875', + 'info_dict': { + 'id': '7111887189571160875', + 'title': 'imanoreotwe-%F0%9F%98%82' + }, + 'playlist_mincount': 100 + }] + _API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/' + _PAGE_COUNT = 30 + + def _build_web_query(self, collection_id, cursor): + return { + 'aid': '1988', + 'collectionId': collection_id, + 'count': self._PAGE_COUNT, + 'cursor': cursor, + 'sourceType': '113', + } + + def _entries(self, collection_id): + cursor = 0 + for page in itertools.count(1): + response = self._download_json( + self._API_BASE_URL, collection_id, f'Downloading page {page}', + query=self._build_web_query(collection_id, cursor)) + + for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): + video_id = video['id'] + author = traverse_obj(video, ('author', ('uniqueId', 'secUid', 'id'), {str}, any)) or '_' + webpage_url = self._create_url(author, video_id) + yield self.url_result( + webpage_url, TikTokIE, + **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True)) + + if not traverse_obj(response, 'hasMore'): + break + cursor += self._PAGE_COUNT + + def _real_extract(self, url): + collection_id, title, user_name = self._match_valid_url(url).group('id', 'title', 'user_id') + + return self.playlist_result( + self._entries(collection_id), collection_id, '-'.join((user_name, title))) + + class DouyinIE(TikTokBaseIE): _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _TESTS = [{ From 5a2eebc76770fca91ffabeff658d560f716fec80 Mon Sep 17 00:00:00 2001 From: ocococococ <104170215+ocococococ@users.noreply.github.com> Date: Sun, 26 May 2024 23:33:15 +0200 Subject: [PATCH 032/145] [ie/LCI] Fix extractor (#10025) Authored by: ocococococ --- yt_dlp/extractor/lci.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index e7d2f8a24c2e..708cb548d865 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,9 +1,25 @@ from .common import InfoExtractor +from .wat import WatIE +from ..utils import ExtractorError, int_or_none +from ..utils.traversal import traverse_obj class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html' _TESTS = [{ + 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html', + 'info_dict': { + 'id': '14113788', + 'ext': 'mp4', + 'title': '24H Pujadas du vendredi 24 mai 2024', + 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg', + 'upload_date': '20240524', + 'duration': 6158, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { 'id': '13875948', @@ -24,5 +40,10 @@ class LCIIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') - return self.url_result('wat:' + wat_id, 'Wat', wat_id) + next_data = self._search_nextjs_data(webpage, video_id) + wat_id = traverse_obj(next_data, ( + 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any)) + if wat_id is None: + raise ExtractorError('Could not find wat_id') + + return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id)) From 5c019f6328ad40d66561eac3c4de0b3cd070d0f6 Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Sun, 26 May 2024 23:37:49 +0200 Subject: [PATCH 033/145] [misc] Cleanup (#9765) Closes #9763 Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- Makefile | 4 ++-- README.md | 7 +++--- devscripts/changelog_override.json | 16 +++++++++++++ devscripts/run_tests.bat | 4 ---- devscripts/run_tests.sh | 4 ---- pyinst.py | 17 -------------- setup.py | 36 ------------------------------ test/test_InfoExtractor.py | 2 +- yt_dlp/extractor/ceskatelevize.py | 2 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/thisvid.py | 2 +- yt_dlp/extractor/vk.py | 4 ++-- yt_dlp/utils/_utils.py | 2 +- 13 files changed, 28 insertions(+), 74 deletions(-) delete mode 100644 devscripts/run_tests.bat delete mode 100755 devscripts/run_tests.sh delete mode 100755 pyinst.py delete mode 100755 setup.py diff --git a/Makefile b/Makefile index b8f010086151..e1de7f3e91a7 100644 --- a/Makefile +++ b/Makefile @@ -74,11 +74,11 @@ codetest: autopep8 --diff . test: - $(PYTHON) -m pytest + $(PYTHON) -m pytest -Werror $(MAKE) codetest offlinetest: codetest - $(PYTHON) -m pytest -k "not download" + $(PYTHON) -m pytest -Werror -m "not download" CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort CODE_FOLDERS != $(CODE_FOLDERS_CMD) diff --git a/README.md b/README.md index 1b4071132ec4..52c80f26efc3 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,6 @@ File|Description [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary -[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) @@ -170,7 +169,7 @@ Example usage: yt-dlp --update-to nightly # To install nightly with pip: -python3 -m pip install -U --pre yt-dlp[default] +python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES @@ -202,7 +201,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) - * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]` + * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds @@ -1751,7 +1750,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 046060cb257a..4be1e58d4327 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -147,5 +147,21 @@ "action": "add", "when": "9590cc6b4768e190183d7d071a6c78170889116a", "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly." + }, + { + "action": "change", + "when": "41ba4a808b597a3afed78c89675a30deb6844450", + "short": "[ie/tiktok] Extract via mobile API only if extractor-arg is passed (#9938)", + "authors": ["bashonly"] + }, + { + "action": "remove", + "when": "6e36d17f404556f0e3a43f441c477a71a91877d9" + }, + { + "action": "change", + "when": "beaf832c7a9d57833f365ce18f6115b88071b296", + "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)", + "authors": ["bashonly", "Grub4K"] } ] diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat deleted file mode 100644 index 57b1f4bf4653..000000000000 --- a/devscripts/run_tests.bat +++ /dev/null @@ -1,4 +0,0 @@ -@echo off - ->&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead -python %~dp0run_tests.py %~1 diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh deleted file mode 100755 index 123ceb1ee4f9..000000000000 --- a/devscripts/run_tests.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env sh - ->&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead' -python3 devscripts/run_tests.py "$1" diff --git a/pyinst.py b/pyinst.py deleted file mode 100755 index 4a8ed2d3497c..000000000000 --- a/pyinst.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import warnings - -from bundle.pyinstaller import main - -warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. ' - 'Use `bundle.pyinstaller` instead')) - -if __name__ == '__main__': - main() diff --git a/setup.py b/setup.py deleted file mode 100755 index 8d1e6d10b227..000000000000 --- a/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import warnings - - -if sys.argv[1:2] == ['py2exe']: - warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. ' - 'Use `bundle.py2exe` instead')) - - import bundle.py2exe - - bundle.py2exe.main() - -elif 'build_lazy_extractors' in sys.argv: - warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. ' - 'Use `devscripts.make_lazy_extractors` instead')) - - import subprocess - - os.chdir(sys.path[0]) - print('running build_lazy_extractors') - subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) - -else: - - print( - 'ERROR: Building by calling `setup.py` is deprecated. ' - 'Use a build frontend like `build` instead. ', - 'Refer to https://build.pypa.io for more info', file=sys.stderr) - sys.exit(1) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c633ce3e47ab..744587e45b0d 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1912,7 +1912,7 @@ def test_search_nextjs_data(self): self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {}) self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None) self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {}) - with self.assertRaises(DeprecationWarning): + with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 156b6a324764..5d6335729629 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -101,7 +101,7 @@ def _real_extract(self, url): site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] + playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 38daad72efc6..b99b7e5ab29f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3517,7 +3517,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) + r'((?:mp4|mp3|flv):)', source_url, maxsplit=1) if len(rtmp_url_parts) == 3: rtmp_url, prefix, play_path = rtmp_url_parts a_format.update({ diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py index 9d3368ed7529..04b0838116ce 100644 --- a/yt_dlp/extractor/thisvid.py +++ b/yt_dlp/extractor/thisvid.py @@ -134,7 +134,7 @@ def _make_playlist_result(self, url): title = re.split( r'(?i)\s*\|\s*ThisVid\.com\s*$', self._og_search_title(webpage, default=None) - or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None + or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', maxsplit=1)[0] or None return self.playlist_from_matches( self._generate_playlist_entries(url, playlist_id, webpage), diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 132d65bcaee5..9a3c75b627ee 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -467,13 +467,13 @@ def _real_extract(self, url): 'source_preference': 1, 'height': height, }) - elif format_id == 'hls': + elif format_id.startswith('hls') and format_id != 'hls_live_playback': fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False, live=is_live) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - elif format_id.startswith('dash_'): + elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'): fmts, subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=format_id, fatal=False) formats.extend(fmts) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 5f458ea45474..42803bb6dfec 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2525,7 +2525,7 @@ def fixup(url): return False # "#" cannot be stripped out since it is part of the URI # However, it can be safely stripped out if following a whitespace - return re.split(r'\s#', url, 1)[0].rstrip() + return re.split(r'\s#', url, maxsplit=1)[0].rstrip() with contextlib.closing(batch_fd) as fd: return [url for url in map(fixup, fd) if url] From ae2af1104f80caf2f47544763a33db2c17a3e1de Mon Sep 17 00:00:00 2001 From: bashonly <bashonly@protonmail.com> Date: Sun, 26 May 2024 16:46:31 -0500 Subject: [PATCH 034/145] [cleanup] Misc Authored by: bashonly, seproDev, Grub4K --- devscripts/changelog_override.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 4be1e58d4327..86e8ec2f99a9 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -163,5 +163,11 @@ "when": "beaf832c7a9d57833f365ce18f6115b88071b296", "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)", "authors": ["bashonly", "Grub4K"] + }, + { + "action": "change", + "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6", + "short": "[cleanup] Misc (#9765)", + "authors": ["bashonly", "Grub4K", "seproDev"] } ] From ed274b60b1ad0193fcf8f4ebb6189b4b865525c6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 26 May 2024 21:55:43 +0000 Subject: [PATCH 035/145] Release 2024.05.26 Created by: bashonly :ci skip all :ci run dl --- CONTRIBUTORS | 20 +++++++++ Changelog.md | 110 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 21 ++++----- supportedsites.md | 37 ++++++++++------ yt_dlp/version.py | 6 +-- 5 files changed, 167 insertions(+), 27 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 8b5d19a64f37..b2a476bea8a1 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -610,3 +610,23 @@ Offert4324 sta1us Tomoka1 trwstin +alexhuot1 +clienthax +DaPotato69 +emqi +hugohaa +imanoreotwe +JakeFinley96 +lostfictions +minamotorin +ocococococ +Podiumnoche +RasmusAntons +roeniss +shoxie007 +Szpachlarz +The-MAGI +TuxCoder +voidful +vtexier +WyohKnott diff --git a/Changelog.md b/Changelog.md index 6cf08beab41c..0d27f1a921de 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,116 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.05.26 + +#### Core changes +- [Better warning when requested subs format not found](https://github.com/yt-dlp/yt-dlp/commit/7e4259dff0b681a3f0e8a930799ce0394328c86e) ([#9873](https://github.com/yt-dlp/yt-dlp/issues/9873)) by [DaPotato69](https://github.com/DaPotato69) +- [Merged with youtube-dl a08f2b7](https://github.com/yt-dlp/yt-dlp/commit/a4da9db87b6486b270c15dfa07ab5bfedc83f6bd) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- [Warn if lack of ffmpeg alters format selection](https://github.com/yt-dlp/yt-dlp/commit/96da9525043f78aca4544d01761b13b2140e9ae6) ([#9805](https://github.com/yt-dlp/yt-dlp/issues/9805)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **cookies** + - [Add `--cookies-from-browser` support for Whale](https://github.com/yt-dlp/yt-dlp/commit/dd9ad97b1fbdd36c086b8ba82328a4d954f78f8e) ([#9649](https://github.com/yt-dlp/yt-dlp/issues/9649)) by [roeniss](https://github.com/roeniss) + - [Get chrome session cookies with `--cookies-from-browser`](https://github.com/yt-dlp/yt-dlp/commit/f1f158976e38d38a260762accafe7bbe6d451151) ([#9747](https://github.com/yt-dlp/yt-dlp/issues/9747)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **windows**: [Improve shell quoting and tests](https://github.com/yt-dlp/yt-dlp/commit/64766459e37451b665c1464073c28361fbcf1c25) ([#9802](https://github.com/yt-dlp/yt-dlp/issues/9802)) by [Grub4K](https://github.com/Grub4K) (With fixes in [7e26bd5](https://github.com/yt-dlp/yt-dlp/commit/7e26bd53f9c5893518fde81dfd0079ec08dd841e)) + +#### Extractor changes +- [Add POST data hash to `--write-pages` filenames](https://github.com/yt-dlp/yt-dlp/commit/61b17437dc14a1c7e90ff48a6198df77828c6df4) ([#9879](https://github.com/yt-dlp/yt-dlp/issues/9879)) by [minamotorin](https://github.com/minamotorin) (With fixes in [c999bac](https://github.com/yt-dlp/yt-dlp/commit/c999bac02c5a4f755b2a82488a975e91c988ffd8) by [bashonly](https://github.com/bashonly)) +- [Make `_search_nextjs_data` non fatal](https://github.com/yt-dlp/yt-dlp/commit/3ee1194288981c4f2c4abd8315326de0c424d2ce) ([#8937](https://github.com/yt-dlp/yt-dlp/issues/8937)) by [Grub4K](https://github.com/Grub4K) +- **afreecatv**: live: [Add `cdn` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/315b3544296bb83012e20ee3af9d3cbf5600dd1c) ([#9666](https://github.com/yt-dlp/yt-dlp/issues/9666)) by [bashonly](https://github.com/bashonly) +- **alura**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc2879ecb05aaad36869609d154e4321362c1f63) ([#9658](https://github.com/yt-dlp/yt-dlp/issues/9658)) by [hugohaa](https://github.com/hugohaa) +- **artetv**: [Label forced subtitles](https://github.com/yt-dlp/yt-dlp/commit/7b5674949fd03a33b47b67b31d56a5adf1c48c91) ([#9945](https://github.com/yt-dlp/yt-dlp/issues/9945)) by [vtexier](https://github.com/vtexier) +- **bbc**: [Fix and extend extraction](https://github.com/yt-dlp/yt-dlp/commit/7975ddf245d22af034d5b983eeb1c5ec6c2ce053) ([#9705](https://github.com/yt-dlp/yt-dlp/issues/9705)) by [dirkf](https://github.com/dirkf), [kylegustavo](https://github.com/kylegustavo), [pukkandan](https://github.com/pukkandan) +- **bilibili**: [Fix `--geo-verification-proxy` support](https://github.com/yt-dlp/yt-dlp/commit/2338827072dacab0f15348b70aec8685feefc8d1) ([#9817](https://github.com/yt-dlp/yt-dlp/issues/9817)) by [fireattack](https://github.com/fireattack) +- **bilibilispacevideo** + - [Better error message](https://github.com/yt-dlp/yt-dlp/commit/06d52c87314e0bbc16c43c405090843885577b88) ([#9839](https://github.com/yt-dlp/yt-dlp/issues/9839)) by [fireattack](https://github.com/fireattack) + - [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4cc99d7b6cce8b39506ead01407445d576b63ee4) ([#9905](https://github.com/yt-dlp/yt-dlp/issues/9905)) by [c-basalt](https://github.com/c-basalt) +- **boosty**: [Add cookies support](https://github.com/yt-dlp/yt-dlp/commit/145dc6f6563e80d2da1b3e9aea2ffa795b71622c) ([#9522](https://github.com/yt-dlp/yt-dlp/issues/9522)) by [RasmusAntons](https://github.com/RasmusAntons) +- **brilliantpala**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/eead3bbc01f6529862bdad1f0b2adeabda4f006e) ([#9788](https://github.com/yt-dlp/yt-dlp/issues/9788)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **canalalpha**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/00a9f2e1f7fa69499221f2e8dd73a08efeef79bc) ([#9675](https://github.com/yt-dlp/yt-dlp/issues/9675)) by [kclauhk](https://github.com/kclauhk) +- **cbc.ca**: player: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8bf48f3a8fa29587e7c73ef5a7710385a5ea725) ([#9866](https://github.com/yt-dlp/yt-dlp/issues/9866)) by [carusocr](https://github.com/carusocr) +- **cda**: [Fix age-gated web extraction](https://github.com/yt-dlp/yt-dlp/commit/6d8a53d870ff6795f509085bfbf3981417999038) ([#9939](https://github.com/yt-dlp/yt-dlp/issues/9939)) by [dirkf](https://github.com/dirkf), [emqi](https://github.com/emqi), [Podiumnoche](https://github.com/Podiumnoche), [Szpachlarz](https://github.com/Szpachlarz) +- **commonmistakes**: [Raise error on blob URLs](https://github.com/yt-dlp/yt-dlp/commit/98d71d8c5e5dab08b561ee6f137e968d2a004262) ([#9897](https://github.com/yt-dlp/yt-dlp/issues/9897)) by [seproDev](https://github.com/seproDev) +- **crunchyroll** + - [Always make metadata available](https://github.com/yt-dlp/yt-dlp/commit/cb2fb4a643949322adba561ca73bcba3221ec0c5) ([#9772](https://github.com/yt-dlp/yt-dlp/issues/9772)) by [bashonly](https://github.com/bashonly) + - [Fix auth and remove cookies support](https://github.com/yt-dlp/yt-dlp/commit/ff38a011d57b763f3a69bebd25a5dc9044a717ce) ([#9749](https://github.com/yt-dlp/yt-dlp/issues/9749)) by [bashonly](https://github.com/bashonly) + - [Fix stream extraction](https://github.com/yt-dlp/yt-dlp/commit/f2816634e3be88fe158b342ee33918de3c272a54) ([#10005](https://github.com/yt-dlp/yt-dlp/issues/10005)) by [bashonly](https://github.com/bashonly) + - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/5904853ae5788509fdc4892cb7ecdfa9ae7f78e6) ([#9857](https://github.com/yt-dlp/yt-dlp/issues/9857)) by [bashonly](https://github.com/bashonly) +- **dangalplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0d067e77c3f5527946fb0c22ee1c7011994cba40) ([#10021](https://github.com/yt-dlp/yt-dlp/issues/10021)) by [bashonly](https://github.com/bashonly) +- **discoveryplus**: [Fix dmax.de and related extractors](https://github.com/yt-dlp/yt-dlp/commit/90d2da311bbb5dc06f385ee428c7e4590936e995) ([#10020](https://github.com/yt-dlp/yt-dlp/issues/10020)) by [bashonly](https://github.com/bashonly) +- **eplus**: [Handle URLs without videos](https://github.com/yt-dlp/yt-dlp/commit/351dc0bc334c4e1b5f00c152818c3ec0ed71f788) ([#9855](https://github.com/yt-dlp/yt-dlp/issues/9855)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **europarlwebstream**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/800a43983e5fb719526ce4cb3956216085c63268) ([#9647](https://github.com/yt-dlp/yt-dlp/issues/9647)) by [seproDev](https://github.com/seproDev), [voidful](https://github.com/voidful) +- **facebook**: [Fix DASH formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e3b42d8b1b8bcfff7ba146c19fc3f6f6ba843cea) ([#9734](https://github.com/yt-dlp/yt-dlp/issues/9734)) by [bashonly](https://github.com/bashonly) +- **godresource**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65e709d23530959075816e966c42179ad46e8e3b) ([#9629](https://github.com/yt-dlp/yt-dlp/issues/9629)) by [HobbyistDev](https://github.com/HobbyistDev) +- **googledrive**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/85ec2a337ac325cf6427cbafd56f0a034c1a5218) ([#9908](https://github.com/yt-dlp/yt-dlp/issues/9908)) by [WyohKnott](https://github.com/WyohKnott) +- **hearthisat**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5bbfdb7c999b22f1aeca0c3489c167d6eb73013b) ([#9949](https://github.com/yt-dlp/yt-dlp/issues/9949)) by [bohwaz](https://github.com/bohwaz), [seproDev](https://github.com/seproDev) +- **hytale**: [Use `CloudflareStreamIE` explicitly](https://github.com/yt-dlp/yt-dlp/commit/31b417e1d1ccc67d5c027bf8878f483dc34cb118) ([#9672](https://github.com/yt-dlp/yt-dlp/issues/9672)) by [llamasblade](https://github.com/llamasblade) +- **instagram**: [Support `/reels/` URLs](https://github.com/yt-dlp/yt-dlp/commit/06cb0638392b607b47d3c2ac48eb2ebecb0f060d) ([#9539](https://github.com/yt-dlp/yt-dlp/issues/9539)) by [amir16yp](https://github.com/amir16yp) +- **jiocinema**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1463945ae5fb05986a0bd1aa02e41d1a08d93a02) ([#10026](https://github.com/yt-dlp/yt-dlp/issues/10026)) by [bashonly](https://github.com/bashonly) +- **jiosaavn**: [Extract via API and fix playlists](https://github.com/yt-dlp/yt-dlp/commit/0c21c53885cf03f4040467ae8c44d7ff51016116) ([#9656](https://github.com/yt-dlp/yt-dlp/issues/9656)) by [bashonly](https://github.com/bashonly) +- **lci**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a2eebc76770fca91ffabeff658d560f716fec80) ([#10025](https://github.com/yt-dlp/yt-dlp/issues/10025)) by [ocococococ](https://github.com/ocococococ) +- **mixch**: [Extract comments](https://github.com/yt-dlp/yt-dlp/commit/b38018b781b062d5169d104ab430489aef8e7f1e) ([#9860](https://github.com/yt-dlp/yt-dlp/issues/9860)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **moviepilot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/296df0da1d38a44d34c99b60a18066c301774537) ([#9366](https://github.com/yt-dlp/yt-dlp/issues/9366)) by [panatexxa](https://github.com/panatexxa) +- **netease**: program: [Improve `--no-playlist` message](https://github.com/yt-dlp/yt-dlp/commit/73f12119b52d98281804b0c072b2ed6aa841ec88) ([#9488](https://github.com/yt-dlp/yt-dlp/issues/9488)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **nfb**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0a1a8e3005f66c44bf67633dccd4df19c3fccd1a) ([#9650](https://github.com/yt-dlp/yt-dlp/issues/9650)) by [rrgomes](https://github.com/rrgomes) +- **ntslive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be7db1a5a8c483726c511c30ea4689cbb8b27962) ([#9641](https://github.com/yt-dlp/yt-dlp/issues/9641)) by [lostfictions](https://github.com/lostfictions) +- **orf**: on: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9) ([#9677](https://github.com/yt-dlp/yt-dlp/issues/9677)) by [TuxCoder](https://github.com/TuxCoder) +- **orftvthek**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/3779f2a307ba3ef1d28e107cdd71b221dfb4eb36) ([#10011](https://github.com/yt-dlp/yt-dlp/issues/10011)) by [seproDev](https://github.com/seproDev) +- **patreon** + - [Extract multiple embeds](https://github.com/yt-dlp/yt-dlp/commit/036e0d92c6052465673d459678322ea03e61483d) ([#9850](https://github.com/yt-dlp/yt-dlp/issues/9850)) by [bashonly](https://github.com/bashonly) + - [Fix Vimeo embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c9ce57d9bf51541da2381d99bc096a9d0ddf1f27) ([#9712](https://github.com/yt-dlp/yt-dlp/issues/9712)) by [bashonly](https://github.com/bashonly) +- **piapro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3ba8de62d61d782256f5c1e9939a0762039657de) ([#9311](https://github.com/yt-dlp/yt-dlp/issues/9311)) by [FinnRG](https://github.com/FinnRG), [seproDev](https://github.com/seproDev) +- **pornhub**: [Fix login by email address](https://github.com/yt-dlp/yt-dlp/commit/518c1afc1592cae3e4eb39dc646b5bc059333112) ([#9914](https://github.com/yt-dlp/yt-dlp/issues/9914)) by [feederbox826](https://github.com/feederbox826) +- **qub**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b54cccdcb892bca3e55993480d8b86f1c7e6da6) ([#7019](https://github.com/yt-dlp/yt-dlp/issues/7019)) by [alexhuot1](https://github.com/alexhuot1), [dirkf](https://github.com/dirkf) +- **reddit**: [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/82f4f4444e26daf35b7302c406fe2312f78f619e) ([#10006](https://github.com/yt-dlp/yt-dlp/issues/10006)) by [kclauhk](https://github.com/kclauhk) +- **soundcloud** + - [Add `formats` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/beaf832c7a9d57833f365ce18f6115b88071b296) ([#10004](https://github.com/yt-dlp/yt-dlp/issues/10004)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Extract `genres`](https://github.com/yt-dlp/yt-dlp/commit/231c2eacc41b06b65c63edf94c0d04768a5da607) ([#9821](https://github.com/yt-dlp/yt-dlp/issues/9821)) by [bashonly](https://github.com/bashonly) +- **taptap**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/63b569bc5e7d461753637a20ad84a575adee4c0a) ([#9776](https://github.com/yt-dlp/yt-dlp/issues/9776)) by [c-basalt](https://github.com/c-basalt) +- **tele5**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d) ([#10024](https://github.com/yt-dlp/yt-dlp/issues/10024)) by [bashonly](https://github.com/bashonly) +- **theatercomplextown**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/8056a3026ed6ec6a6d0ed56fdd7ebcd16e928341) ([#9754](https://github.com/yt-dlp/yt-dlp/issues/9754)) by [bashonly](https://github.com/bashonly) +- **tiktok** + - [Add `device_id` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/3584b8390bd21c0393a3079eeee71aed56a1c1d8) ([#9951](https://github.com/yt-dlp/yt-dlp/issues/9951)) by [bashonly](https://github.com/bashonly) + - [Extract all web formats](https://github.com/yt-dlp/yt-dlp/commit/4ccd73fea0f6f4be343e1ec7f22dd03799addcf8) ([#9960](https://github.com/yt-dlp/yt-dlp/issues/9960)) by [bashonly](https://github.com/bashonly) + - [Extract via mobile API only if extractor-arg is passed](https://github.com/yt-dlp/yt-dlp/commit/41ba4a808b597a3afed78c89675a30deb6844450) ([#9938](https://github.com/yt-dlp/yt-dlp/issues/9938)) by [bashonly](https://github.com/bashonly) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/eef1e9f44ff14c5e65b759bb1eafa3946cdaf719) ([#9961](https://github.com/yt-dlp/yt-dlp/issues/9961)) by [bashonly](https://github.com/bashonly) + - collection: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/119d41f27061d220d276a2d38cfc8d873437452a) ([#9986](https://github.com/yt-dlp/yt-dlp/issues/9986)) by [bashonly](https://github.com/bashonly), [imanoreotwe](https://github.com/imanoreotwe) + - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/347f13dd9bccc2b4db3ea25689410d45d8370ed4) ([#9661](https://github.com/yt-dlp/yt-dlp/issues/9661)) by [bashonly](https://github.com/bashonly) +- **tv5monde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6db96268c521e945d42649607db1574f5d92e082) ([#9143](https://github.com/yt-dlp/yt-dlp/issues/9143)) by [alard](https://github.com/alard), [seproDev](https://github.com/seproDev) +- **twitter** + - [Fix auth for x.com migration](https://github.com/yt-dlp/yt-dlp/commit/3e35aa32c74bc108375be8c8b6b3bfc90dfff1b4) ([#9952](https://github.com/yt-dlp/yt-dlp/issues/9952)) by [bashonly](https://github.com/bashonly) + - [Support x.com URLs](https://github.com/yt-dlp/yt-dlp/commit/4813173e4544f125d6f2afc31e600727d761b8dd) ([#9926](https://github.com/yt-dlp/yt-dlp/issues/9926)) by [bashonly](https://github.com/bashonly) +- **vk**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/df5c9e733aaba703cf285c0372b6d61629330c82) ([#9885](https://github.com/yt-dlp/yt-dlp/issues/9885)) by [seproDev](https://github.com/seproDev) +- **wrestleuniverse**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/c4853655cb9a793129280806af643de43c48f4d5) ([#9800](https://github.com/yt-dlp/yt-dlp/issues/9800)) by [bashonly](https://github.com/bashonly) +- **xiaohongshu**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2e9031605d87c469be9ce98dbbdf4960b727338) ([#9646](https://github.com/yt-dlp/yt-dlp/issues/9646)) by [HobbyistDev](https://github.com/HobbyistDev) +- **xvideos**: quickies: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b207d26f83fb8ab0ce56df74dff43ff583a3264f) ([#9834](https://github.com/yt-dlp/yt-dlp/issues/9834)) by [JakeFinley96](https://github.com/JakeFinley96) +- **youporn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/351368cb9a6731b886a58f5a10fd6b302bbe47be) ([#8827](https://github.com/yt-dlp/yt-dlp/issues/8827)) by [The-MAGI](https://github.com/The-MAGI) +- **youtube** + - [Add `mediaconnect` client](https://github.com/yt-dlp/yt-dlp/commit/cf212d0a331aba05c32117573f760cdf3af8c62f) ([#9546](https://github.com/yt-dlp/yt-dlp/issues/9546)) by [clienthax](https://github.com/clienthax) + - [Extract upload timestamp if available](https://github.com/yt-dlp/yt-dlp/commit/96a134dea6397a5f2131947c427aac52c8b4e677) ([#9856](https://github.com/yt-dlp/yt-dlp/issues/9856)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/8e15177b4113c355989881e4e030f695a9b59c3a) ([#9775](https://github.com/yt-dlp/yt-dlp/issues/9775)) by [bbilly1](https://github.com/bbilly1), [jakeogh](https://github.com/jakeogh), [minamotorin](https://github.com/minamotorin), [shoxie007](https://github.com/shoxie007) + - [Remove `android` from default clients](https://github.com/yt-dlp/yt-dlp/commit/12d8ea8246fa901de302ff5cc748caddadc82f41) ([#9553](https://github.com/yt-dlp/yt-dlp/issues/9553)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) +- **zenyandex**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c4b87dd885ee5391e5f481e7c8bd550a7c543623) ([#9813](https://github.com/yt-dlp/yt-dlp/issues/9813)) by [src-tinkerer](https://github.com/src-tinkerer) + +#### Networking changes +- [Add `extensions` attribute to `Response`](https://github.com/yt-dlp/yt-dlp/commit/bec9a59e8ec82c18e3bf9268eaa436793dd52e35) ([#9756](https://github.com/yt-dlp/yt-dlp/issues/9756)) by [bashonly](https://github.com/bashonly) +- **Request Handler** + - requests + - [Patch support for `requests` 2.32.2+](https://github.com/yt-dlp/yt-dlp/commit/3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0) ([#9992](https://github.com/yt-dlp/yt-dlp/issues/9992)) by [Grub4K](https://github.com/Grub4K) + - [Update to `requests` 2.32.0](https://github.com/yt-dlp/yt-dlp/commit/c36513f1be2ef3d3cec864accbffda1afaa06ffd) ([#9980](https://github.com/yt-dlp/yt-dlp/issues/9980)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- [Add `hatch`, `ruff`, `pre-commit` and improve dev docs](https://github.com/yt-dlp/yt-dlp/commit/e897bd8292a41999cf51dba91b390db5643c72db) ([#7409](https://github.com/yt-dlp/yt-dlp/issues/7409)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **build** + - [Migrate `linux_exe` to static musl builds](https://github.com/yt-dlp/yt-dlp/commit/ac817bc83efd939dca3e40c4b527d0ccfc77172b) ([#9811](https://github.com/yt-dlp/yt-dlp/issues/9811)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Normalize `curl_cffi` group to `curl-cffi`](https://github.com/yt-dlp/yt-dlp/commit/02483bea1c4dbe1bace8ca4d19700104fbb8a00f) ([#9698](https://github.com/yt-dlp/yt-dlp/issues/9698)) by [bashonly](https://github.com/bashonly) (With fixes in [89f535e](https://github.com/yt-dlp/yt-dlp/commit/89f535e2656964b4061c25a7739d4d6ba0a30568)) + - [Run `macos_legacy` job on `macos-12`](https://github.com/yt-dlp/yt-dlp/commit/1a366403d9c26b992faa77e00f4d02ead57559e3) ([#9804](https://github.com/yt-dlp/yt-dlp/issues/9804)) by [bashonly](https://github.com/bashonly) + - [`macos` job requires `setuptools<70`](https://github.com/yt-dlp/yt-dlp/commit/78c57cc0e0998b8ed90e4306f410aa4be4115cd7) ([#9993](https://github.com/yt-dlp/yt-dlp/issues/9993)) by [bashonly](https://github.com/bashonly) +- **cleanup** + - [Remove questionable extractors](https://github.com/yt-dlp/yt-dlp/commit/01395a34345d1c6ba1b73ca92f94dd200dc45341) ([#9911](https://github.com/yt-dlp/yt-dlp/issues/9911)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [5c019f6](https://github.com/yt-dlp/yt-dlp/commit/5c019f6328ad40d66561eac3c4de0b3cd070d0f6), [ae2af11](https://github.com/yt-dlp/yt-dlp/commit/ae2af1104f80caf2f47544763a33db2c17a3e1de) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **test** + - [Add HTTP proxy tests](https://github.com/yt-dlp/yt-dlp/commit/3c7a287e281d9f9a353dce8902ff78a84c24a040) ([#9578](https://github.com/yt-dlp/yt-dlp/issues/9578)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix connect timeout test](https://github.com/yt-dlp/yt-dlp/commit/53b4d44f55cca66ac33dab092ef2a30b1164b684) ([#9906](https://github.com/yt-dlp/yt-dlp/issues/9906)) by [coletdjnz](https://github.com/coletdjnz) + ### 2024.04.09 #### Important changes diff --git a/README.md b/README.md index 52c80f26efc3..e757567b5a4c 100644 --- a/README.md +++ b/README.md @@ -665,16 +665,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git The name of the browser to load cookies from. Currently supported browsers are: brave, chrome, chromium, edge, firefox, - opera, safari, vivaldi, whale. Optionally, the - KEYRING used for decrypting Chromium cookies - on Linux, the name/path of the PROFILE to - load cookies from, and the CONTAINER name - (if Firefox) ("none" for no container) can - be given with their respective seperators. - By default, all containers of the most - recently accessed profile are used. - Currently supported keyrings are: basictext, - gnomekeyring, kwallet, kwallet5, kwallet6 + opera, safari, vivaldi, whale. Optionally, + the KEYRING used for decrypting Chromium + cookies on Linux, the name/path of the + PROFILE to load cookies from, and the + CONTAINER name (if Firefox) ("none" for no + container) can be given with their + respective seperators. By default, all + containers of the most recently accessed + profile are used. Currently supported + keyrings are: basictext, gnomekeyring, + kwallet, kwallet5, kwallet6 --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where yt-dlp can store some downloaded information (such as diff --git a/supportedsites.md b/supportedsites.md index ba77c0feb004..387395613379 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -14,7 +14,6 @@ - **6play** - **7plus** - **8tracks** - - **91porn** - **9c9media** - **9gag**: 9GAG - **9News** @@ -220,7 +219,7 @@ - **BusinessInsider** - **BuzzFeed** - **BYUtv**: (**Currently broken**) - - **CableAV** + - **CaffeineTV** - **Callin** - **Caltrans** - **CAM4** @@ -333,6 +332,8 @@ - **DailyWirePodcast** - **damtomo:record** - **damtomo:video** + - **dangalplay**: [*dangalplay*](## "netrc machine") + - **dangalplay:season**: [*dangalplay*](## "netrc machine") - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -396,7 +397,6 @@ - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - - **Einthusan** - **eitb.tv** - **ElementorEmbed** - **Elonet** @@ -498,6 +498,7 @@ - **GameStar** - **Gaskrank** - **Gazeta**: (**Currently broken**) + - **GBNews**: GB News clips, features and live streams - **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**) - **GediDigital** - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") @@ -527,6 +528,7 @@ - **GMANetworkVideo** - **Go** - **GoDiscovery** + - **GodResource** - **GodTube**: (**Currently broken**) - **Gofile** - **Golem** @@ -630,11 +632,11 @@ - **iwara:user**: [*iwara*](## "netrc machine") - **Ixigua** - **Izlesene** - - **Jable** - - **JablePlaylist** - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) + - **jiocinema**: [*jiocinema*](## "netrc machine") + - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:playlist** - **jiosaavn:song** @@ -974,6 +976,7 @@ - **NRKTVSeason** - **NRKTVSeries** - **NRLTV**: (**Currently broken**) + - **nts.live** - **ntv.ru** - **NubilesPorn**: [*nubiles-porn*](## "netrc machine") - **nuum:live** @@ -1015,7 +1018,6 @@ - **orf:on** - **orf:podcast** - **orf:radio** - - **orf:tvthek**: ORF TVthek - **OsnatelTV**: [*osnateltv*](## "netrc machine") - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") @@ -1394,6 +1396,10 @@ - **SztvHu** - **t-online.de**: (**Currently broken**) - **Tagesschau**: (**Currently broken**) + - **TapTapApp** + - **TapTapAppIntl** + - **TapTapMoment** + - **TapTapPostIntl** - **Tass**: (**Currently broken**) - **TBS** - **TBSJPEpisode** @@ -1412,7 +1418,7 @@ - **TedSeries** - **TedTalk** - **Tele13** - - **Tele5**: (**Currently broken**) + - **Tele5** - **TeleBruxelles** - **TelecaribePlay** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es @@ -1452,11 +1458,12 @@ - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** + - **tiktok:collection** - **tiktok:effect**: (**Currently broken**) - **tiktok:live** - **tiktok:sound**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**) - - **tiktok:user**: (**Currently broken**) + - **tiktok:user** - **TLC** - **TMZ** - **TNAFlix** @@ -1501,7 +1508,7 @@ - **tv2play.hu** - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - - **TV5MondePlus**: TV5MONDE+ + - **TV5MONDE** - **tv5unis** - **tv5unis:video** - **tv8.it** @@ -1639,8 +1646,6 @@ - **voicy**: (**Currently broken**) - **voicy:channel**: (**Currently broken**) - **VolejTV** - - **Voot**: [*voot*](## "netrc machine") (**Currently broken**) - - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**) - **VoxMedia** - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl @@ -1715,10 +1720,10 @@ - **wykop:​post:comment** - **Xanimu** - **XboxClips** - - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing - **XHamster** - **XHamsterEmbed** - **XHamsterUser** + - **XiaoHongShu**: 小红书 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **xinpianchang**: xinpianchang.com (**Currently broken**) @@ -1749,8 +1754,12 @@ - **YouNowLive** - **YouNowMoment** - **YouPorn** - - **YourPorn** - - **YourUpload** + - **YouPornCategory**: YouPorn category, with sorting, filtering and pagination + - **YouPornChannel**: YouPorn channel, with sorting and pagination + - **YouPornCollection**: YouPorn collection (user playlist), with sorting and pagination + - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination + - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination + - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - **youtube**: YouTube - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 22c2c048d859..415dc0eaf932 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.04.09' +__version__ = '2024.05.26' -RELEASE_GIT_HEAD = 'ff07792676f404ffff6ee61b5638c9dc1a33a37a' +RELEASE_GIT_HEAD = 'ae2af1104f80caf2f47544763a33db2c17a3e1de' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.04.09' +_pkg_version = '2024.05.26' From 26603d0b34898818992bee4598e0607c07059511 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Mon, 27 May 2024 00:06:34 +0200 Subject: [PATCH 036/145] [ie] Fix parsing of base URL in SMIL manifest (#9225) Authored by: seproDev --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b99b7e5ab29f..1d2c443c0b75 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2451,7 +2451,7 @@ def _parse_smil_formats_and_subtitles( }) continue - src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src) + src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src) src_url = src_url.strip() if proto == 'm3u8' or src_ext == 'm3u8': From ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Mon, 27 May 2024 01:24:03 +0200 Subject: [PATCH 037/145] [ie/Piksel] Update domain (#9223) Authored by: seproDev --- yt_dlp/extractor/piksel.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 8870d7b9924c..02ae2fe1aac6 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -25,29 +25,31 @@ class PikselIE(InfoExtractor): )| (?:api|player)\.multicastmedia| (?:api-ovp|player)\.piksel - )\.com| + )\.(?:com|tech)| (?: mz-edge\.stream\.co| movie-s\.nhk\.or )\.jp| vidego\.baltimorecity\.gov )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' - _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)'] + _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.(?:com|tech)/v/[a-z0-9]+)'] _TESTS = [ { - 'url': 'http://player.piksel.com/v/ums2867l', + 'url': 'http://player.piksel.tech/v/ums2867l', 'md5': '34e34c8d89dc2559976a6079db531e85', 'info_dict': { 'id': 'ums2867l', 'ext': 'mp4', 'title': 'GX-005 with Caption', 'timestamp': 1481335659, - 'upload_date': '20161210' + 'upload_date': '20161210', + 'description': '', + 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480', } }, { # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al - 'url': 'https://player.piksel.com/v/v80kqp41', + 'url': 'https://player.piksel.tech/v/v80kqp41', 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', 'info_dict': { 'id': 'v80kqp41', @@ -55,7 +57,8 @@ class PikselIE(InfoExtractor): 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', 'timestamp': 1486171129, - 'upload_date': '20170204' + 'upload_date': '20170204', + 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360', } }, { @@ -65,7 +68,7 @@ class PikselIE(InfoExtractor): } ] - def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True): + def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True): url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5') response = traverse_obj( self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {} @@ -146,7 +149,7 @@ def process_asset_files(asset_files): smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) if smil_url: - transform_source = None + transform_source = lambda x: x.replace('src="/', 'src="') if ref_id == 'nhkworld': # TODO: figure out if this is something to be fixed in urljoin, # _parse_smil_formats or keep it here From c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 26 May 2024 23:22:46 -0500 Subject: [PATCH 038/145] [ie/tiktok:user] Fix extraction loop (#10035) Closes #10033 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index ab8efc19ed66..7bcfdedbeae7 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -940,6 +940,7 @@ def _build_web_query(self, sec_uid, cursor): def _entries(self, sec_uid, user_name): display_id = user_name or sec_uid + seen_ids = set() cursor = int(time.time() * 1E3) for page in itertools.count(1): @@ -949,6 +950,9 @@ def _entries(self, sec_uid, user_name): for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): video_id = video['id'] + if video_id in seen_ids: + continue + seen_ids.add(video_id) webpage_url = self._create_url(display_id, video_id) yield self.url_result( webpage_url, TikTokIE, @@ -956,8 +960,8 @@ def _entries(self, sec_uid, user_name): old_cursor = cursor cursor = traverse_obj( - response, ('itemList', -1, 'createTime', {functools.partial(int_or_none, invscale=1E3)})) - if not cursor: + response, ('itemList', -1, 'createTime', {lambda x: int(x * 1E3)})) + if not cursor or old_cursor == cursor: # User may not have posted within this ~1 week lookback, so manually adjust cursor cursor = old_cursor - 7 * 86_400_000 # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed From 5e3e19c93c52830da98d9d1ed84ea7a559efefbd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 27 May 2024 16:46:07 -0500 Subject: [PATCH 039/145] [cleanup] Misc (#10043) Authored by: bashonly --- README.md | 3 +++ yt_dlp/options.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e757567b5a4c..e8cd6d3a06f8 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --impersonate CLIENT[:OS] Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. Pass --impersonate="" to impersonate any client. + Note that forcing impersonation for all + requests may have a detrimental impact on + download speed and stability --list-impersonate-targets List available clients to impersonate. -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 997b575cd46a..9615bfbaa4d3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -520,7 +520,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): metavar='CLIENT[:OS]', dest='impersonate', default=None, help=( 'Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. ' - 'Pass --impersonate="" to impersonate any client.'), + 'Pass --impersonate="" to impersonate any client. Note that forcing impersonation ' + 'for all requests may have a detrimental impact on download speed and stability'), ) network.add_option( '--list-impersonate-targets', From 12b248ce60be1aa1362edd839d915bba70dbee4b Mon Sep 17 00:00:00 2001 From: trueauracoral <87541524+trueauracoral@users.noreply.github.com> Date: Mon, 27 May 2024 17:24:01 -0500 Subject: [PATCH 040/145] [ie/peertube] Support livestreams (#10044) Closes #2055 Authored by: trueauracoral, bashonly --- yt_dlp/extractor/peertube.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index b7919c0734bb..fb4d02562159 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1470,11 +1470,15 @@ def _real_extract(self, url): title = video['name'] - formats = [] + formats, is_live = [], False files = video.get('files') or [] for playlist in (video.get('streamingPlaylists') or []): if not isinstance(playlist, dict): continue + if playlist_url := url_or_none(playlist.get('playlistUrl')): + is_live = True + formats.extend(self._extract_m3u8_formats( + playlist_url, video_id, fatal=False, live=True)) playlist_files = playlist.get('files') if not (playlist_files and isinstance(playlist_files, list)): continue @@ -1498,6 +1502,7 @@ def _real_extract(self, url): f['vcodec'] = 'none' else: f['fps'] = int_or_none(file_.get('fps')) + is_live = False formats.append(f) description = video.get('description') @@ -1555,6 +1560,7 @@ def channel_data(field, type_): 'categories': categories, 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, 'webpage_url': webpage_url, } From 111b61ddef305584d45a48e7b7c73ffcedf062a2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 22:35:55 +0000 Subject: [PATCH 041/145] Release 2024.05.27 Created by: bashonly :ci skip all :ci run dl --- CONTRIBUTORS | 1 + Changelog.md | 11 +++++++++++ yt_dlp/version.py | 6 +++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index b2a476bea8a1..e0d1668ee2fa 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -630,3 +630,4 @@ TuxCoder voidful vtexier WyohKnott +trueauracoral diff --git a/Changelog.md b/Changelog.md index 0d27f1a921de..267330208e1d 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,17 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.05.27 + +#### Extractor changes +- [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev) +- **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral) +- **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev) +- **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly) + ### 2024.05.26 #### Core changes diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 415dc0eaf932..a90b288c9aae 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.05.26' +__version__ = '2024.05.27' -RELEASE_GIT_HEAD = 'ae2af1104f80caf2f47544763a33db2c17a3e1de' +RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.05.26' +_pkg_version = '2024.05.27' From bef9a9e5361fd7a72e21d0f1a8c8afb70d89e8c5 Mon Sep 17 00:00:00 2001 From: Ben Galliart <bgallia@gmail.com> Date: Tue, 28 May 2024 23:25:05 -0500 Subject: [PATCH 042/145] [ie/TubiTv] Fix extractor (#9975) Closes #9937 Authored by: chilinux --- yt_dlp/extractor/tubitv.py | 101 ++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index bd46bc363001..78be86d5881b 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -7,33 +7,45 @@ int_or_none, js_to_json, traverse_obj, + url_or_none, urlencode_postdata, ) class TubiTvIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - tubitv:| - https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/ - ) - (?P<id>[0-9]+)''' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' - _GEO_COUNTRIES = ['US'] _TESTS = [{ - 'url': 'https://tubitv.com/movies/383676/tracker', - 'md5': '566fa0f76870302d11af0de89511d3f0', + 'url': 'https://tubitv.com/movies/100004539/the-39-steps', 'info_dict': { - 'id': '383676', + 'id': '100004539', 'ext': 'mp4', - 'title': 'Tracker', - 'description': 'md5:ff320baf43d0ad2655e538c1d5cd9706', - 'uploader_id': 'f866e2677ea2f0dff719788e4f7f9195', - 'release_year': 2010, + 'title': 'The 39 Steps', + 'description': 'md5:bb2f2dd337f0dc58c06cb509943f54c8', + 'uploader_id': 'abc2558d54505d4f0f32be94f2e7108c', + 'release_year': 1935, 'thumbnail': r're:^https?://.+\.(jpe?g|png)$', - 'duration': 6122, + 'duration': 5187, }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://tubitv.com/tv-shows/554628/s01-e01-rise-of-the-snakes', + 'info_dict': { + 'id': '554628', + 'ext': 'mp4', + 'title': 'S01:E01 - Rise of the Snakes', + 'description': 'md5:ba136f586de53af0372811e783a3f57d', + 'episode': 'Rise of the Snakes', + 'episode_number': 1, + 'season': 'Season 1', + 'season_number': 1, + 'uploader_id': '2a9273e728c510d22aa5c57d0646810b', + 'release_year': 2011, + 'thumbnail': r're:^https?://.+\.(jpe?g|png)$', + 'duration': 1376, + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', @@ -81,45 +93,39 @@ def _perform_login(self, username, password): 'Login failed (invalid username/password)', expected=True) def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_json(f'https://tubitv.com/oz/videos/{video_id}/content', video_id, query={ - 'video_resources': ['dash', 'hlsv3', 'hlsv6', *self._UNPLAYABLE_FORMATS], - }) - title = video_data['title'] + video_id, video_type = self._match_valid_url(url).group('id', 'type') + webpage = self._download_webpage(f'https://tubitv.com/{video_type}/{video_id}/', video_id) + video_data = self._search_json( + r'window\.__data\s*=', webpage, 'data', video_id, + transform_source=js_to_json)['video']['byId'][video_id] formats = [] drm_formats = False - for resource in video_data['video_resources']: - if resource['type'] in ('dash', ): - formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False) - elif resource['type'] in ('hlsv3', 'hlsv6'): - formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False) - elif resource['type'] in self._UNPLAYABLE_FORMATS: + for resource in traverse_obj(video_data, ('video_resources', lambda _, v: url_or_none(v['manifest']['url']))): + resource_type = resource.get('type') + manifest_url = resource['manifest']['url'] + if resource_type == 'dash': + formats.extend(self._extract_mpd_formats(manifest_url, video_id, mpd_id=resource_type, fatal=False)) + elif resource_type in ('hlsv3', 'hlsv6'): + formats.extend(self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id=resource_type, fatal=False)) + elif resource_type in self._UNPLAYABLE_FORMATS: drm_formats = True + else: + self.report_warning(f'Skipping unknown resource type "{resource_type}"') if not formats and drm_formats: self.report_drm(video_id) elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed raise ExtractorError('This content is currently unavailable', expected=True) - thumbnails = [] - for thumbnail_url in video_data.get('thumbnails', []): - if not thumbnail_url: - continue - thumbnails.append({ - 'url': self._proto_relative_url(thumbnail_url), - }) - subtitles = {} - for sub in video_data.get('subtitles', []): - sub_url = sub.get('url') - if not sub_url: - continue + for sub in traverse_obj(video_data, ('subtitles', lambda _, v: url_or_none(v['url']))): subtitles.setdefault(sub.get('lang', 'English'), []).append({ - 'url': self._proto_relative_url(sub_url), + 'url': self._proto_relative_url(sub['url']), }) + title = traverse_obj(video_data, ('title', {str})) season_number, episode_number, episode_title = self._search_regex( r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None)) @@ -128,18 +134,21 @@ def _real_extract(self, url): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'description': video_data.get('description'), - 'duration': int_or_none(video_data.get('duration')), - 'uploader_id': video_data.get('publisher_id'), - 'release_year': int_or_none(video_data.get('year')), 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), - 'episode_title': episode_title + 'episode': episode_title, + **traverse_obj(video_data, { + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + 'uploader_id': ('publisher_id', {str}), + 'release_year': ('year', {int_or_none}), + 'thumbnails': ('thumbnails', ..., {url_or_none}, {'url': {self._proto_relative_url}}), + }), } class TubiTvShowIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)' _TESTS = [{ 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true', @@ -160,7 +169,7 @@ def _entries(self, show_url, show_name): if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's': continue yield self.url_result( - 'tubitv:%s' % episode_id, + f'https://tubitv.com/tv-shows/{episode_id}/', ie=TubiTvIE.ie_key(), video_id=episode_id) def _real_extract(self, url): From 8b46ad4d8b8ee8c5472af0cde863baa89ca3f425 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Wed, 29 May 2024 23:16:57 +0200 Subject: [PATCH 043/145] [ie/orf:on] Support segmented episodes (#10053) Closes #9930 Authored by: seproDev --- yt_dlp/extractor/orf.py | 141 +++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 30 deletions(-) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 3c837becdb89..039f33bd66c5 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -12,7 +12,9 @@ mimetype2ext, orderedSet, parse_age_limit, + parse_iso8601, remove_end, + str_or_none, strip_jsonp, try_call, unified_strdate, @@ -390,7 +392,7 @@ def _real_extract(self, url): class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' - _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)' + _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)(?:/(?P<segment>\d+))?' _TESTS = [{ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', 'info_dict': { @@ -401,10 +403,14 @@ class ORFONIE(InfoExtractor): 'title': 'School of Champions (4/8)', 'description': 'md5:d09ad279fc2e8502611e7648484b6afd', 'media_type': 'episode', - 'timestamp': 1706472362, - 'upload_date': '20240128', + 'timestamp': 1706558922, + 'upload_date': '20240129', + 'release_timestamp': 1706472362, + 'release_date': '20240128', + 'modified_timestamp': 1712756663, + 'modified_date': '20240410', '_old_archive_ids': ['orftvthek 14210000'], - } + }, }, { 'url': 'https://on.orf.at/video/3220355', 'md5': 'f94d98e667cf9a3851317efb4e136662', @@ -418,18 +424,87 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 52916400, 'upload_date': '19710905', + 'release_timestamp': 52916400, + 'release_date': '19710905', + 'modified_timestamp': 1498536049, + 'modified_date': '20170627', '_old_archive_ids': ['orftvthek 3220355'], - } + }, + }, { + # Video with multiple segments selecting the second segment + 'url': 'https://on.orf.at/video/14226549/15639808/jugendbande-einbrueche-aus-langeweile', + 'md5': '90f4ebff86b4580837b8a361d0232a9e', + 'info_dict': { + 'id': '15639808', + 'ext': 'mp4', + 'duration': 97.707, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0175/43/thumb_17442704_segments_highlight_teaser.jpg', + 'title': 'Jugendbande: Einbrüche aus Langeweile', + 'description': 'md5:193df0bf0d91cf16830c211078097120', + 'media_type': 'segment', + 'timestamp': 1715792400, + 'upload_date': '20240515', + 'modified_timestamp': 1715794394, + 'modified_date': '20240515', + '_old_archive_ids': ['orftvthek 15639808'], + }, + 'params': {'noplaylist': True}, + }, { + # Video with multiple segments and no combined version + 'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024', + 'info_dict': { + '_type': 'multi_video', + 'id': '14227864', + 'duration': 18410.52, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/04/thumb_17503881_segments_highlight_teaser.jpg', + 'title': 'Formel 1: Großer Preis von Monaco 2024', + 'description': 'md5:aeeb010710ccf70ce28ccb4482243d4f', + 'media_type': 'episode', + 'timestamp': 1716721200, + 'upload_date': '20240526', + 'release_timestamp': 1716721802, + 'release_date': '20240526', + 'modified_timestamp': 1716967501, + 'modified_date': '20240529', + }, + 'playlist_count': 42, + }, { + # Video with multiple segments, but with combined version + 'url': 'https://on.orf.at/video/14228172', + 'info_dict': { + 'id': '14228172', + 'ext': 'mp4', + 'duration': 3294.878, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/17/thumb_17516455_segments_highlight_teaser.jpg', + 'title': 'Willkommen Österreich mit Stermann & Grissemann', + 'description': 'md5:5de034d033a9c27f989343be3bbd4839', + 'media_type': 'episode', + 'timestamp': 1716926584, + 'upload_date': '20240528', + 'release_timestamp': 1716919202, + 'release_date': '20240528', + 'modified_timestamp': 1716968045, + 'modified_date': '20240529', + '_old_archive_ids': ['orftvthek 14228172'], + }, }] - def _extract_video(self, video_id): - encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() - api_json = self._download_json( - f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) - - if traverse_obj(api_json, 'is_drm_protected'): - self.report_drm(video_id) - + @staticmethod + def _parse_metadata(api_json): + return traverse_obj(api_json, { + 'id': ('id', {int}, {str_or_none}), + 'age_limit': ('age_classification', {parse_age_limit}), + 'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}), + 'title': (('title', 'headline'), {str}), + 'description': (('description', 'teaser_text'), {str}), + 'media_type': ('video_type', {str}), + 'thumbnail': ('_embedded', 'image', 'public_urls', 'highlight_teaser', 'url', {url_or_none}), + 'timestamp': (('date', 'episode_date'), {parse_iso8601}), + 'release_timestamp': ('release_date', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + }, get_all=False) + + def _extract_video_info(self, video_id, api_json): formats, subtitles = [], {} for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): @@ -454,24 +529,30 @@ def _extract_video(self, video_id): 'formats': formats, 'subtitles': subtitles, '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], - **traverse_obj(api_json, { - 'age_limit': ('age_classification', {parse_age_limit}), - 'duration': ('duration_second', {float_or_none}), - 'title': (('title', 'headline'), {str}), - 'description': (('description', 'teaser_text'), {str}), - 'media_type': ('video_type', {str}), - }, get_all=False), + **self._parse_metadata(api_json), } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_id, segment_id = self._match_valid_url(url).group('id', 'segment') - return { - 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), - 'description': self._html_search_meta( - ['description', 'og:description', 'twitter:description'], webpage, default=None), - **self._search_json_ld(webpage, video_id, fatal=False), - **self._extract_video(video_id), - } + encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() + api_json = self._download_json( + f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) + + if traverse_obj(api_json, 'is_drm_protected'): + self.report_drm(video_id) + + segments = traverse_obj(api_json, ('_embedded', 'segments', lambda _, v: v['id'])) + selected_segment = traverse_obj(segments, (lambda _, v: str(v['id']) == segment_id, any)) + + # selected_segment will be falsy if input URL did not include a valid segment_id + if selected_segment and not self._yes_playlist(video_id, segment_id, playlist_label='episode', video_label='segment'): + return self._extract_video_info(segment_id, selected_segment) + + # Even some segmented videos have an unsegmented version available in API response root + if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})): + return self.playlist_result( + (self._extract_video_info(str(segment['id']), segment) for segment in segments), + video_id, **self._parse_metadata(api_json), multi_video=True) + + return self._extract_video_info(video_id, api_json) From 03334d639d5282cd4107edb32c623ba400262fc4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 30 May 2024 13:53:37 -0500 Subject: [PATCH 044/145] [build] Use `macos-12` image for `yt-dlp_macos` (#10063) Ref: https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/ Authored by: bashonly --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 55cf3b3a271c..e3896e9c9a3d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -237,7 +237,7 @@ jobs: macos: needs: process if: inputs.macos - runs-on: macos-11 + runs-on: macos-12 steps: - uses: actions/checkout@v4 From 5fdd13006a1c5d78642c8d3c4c7df0448273c2ae Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 30 May 2024 17:34:02 -0500 Subject: [PATCH 045/145] [build] Bump Pyinstaller to `>=6.7.0` for all builds (#10069) Ref: https://github.com/pyinstaller/pyinstaller/issues/8554 Authored by: bashonly, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- .github/workflows/build.yml | 26 +++++++++++++++++++------- pyproject.toml | 5 ++--- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3896e9c9a3d..9a1a22e8f54e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -260,11 +260,23 @@ jobs: --pre -d curl_cffi_whls \ -r requirements.txt done + ( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite + # See https://github.com/yt-dlp/yt-dlp/pull/10069 + cd curl_cffi_whls + mkdir -p curl_cffi/.dylibs + python_libdir=$(python3 -c 'import sys; from pathlib import Path; print(Path(sys.path[1]).parent)') + for dylib in lib{ssl,crypto}.3.dylib; do + cp "${python_libdir}/${dylib}" "curl_cffi/.dylibs/${dylib}" + for wheel in curl_cffi*macos*x86_64.whl; do + zip "${wheel}" "curl_cffi/.dylibs/${dylib}" + done + done + ) python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2 python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2 cd curl_cffi_universal2 - for wheel in *cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done - python3 -m pip install -U --user *cffi*.whl + for wheel in ./*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done + python3 -m pip install -U --user ./*cffi*.whl - name: Prepare run: | @@ -311,7 +323,7 @@ jobs: # Hack to get the latest patch version. Uncomment if needed #brew install python@3.10 #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) - curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg" + curl "https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg" -o "python.pkg" sudo installer -pkg python.pkg -target / python3 --version - name: Install Requirements @@ -361,7 +373,7 @@ jobs: run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build python devscripts/install_deps.py --include curl-cffi - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.7.0-py3-none-any.whl" - name: Prepare run: | @@ -421,7 +433,7 @@ jobs: run: | python devscripts/install_deps.py -o --include build python devscripts/install_deps.py - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl" - name: Prepare run: | @@ -475,8 +487,8 @@ jobs: run: | cd ./artifact/ # make sure SHA sums are also printed to stdout - sha256sum * | tee ../SHA2-256SUMS - sha512sum * | tee ../SHA2-512SUMS + sha256sum -- * | tee ../SHA2-256SUMS + sha512sum -- * | tee ../SHA2-512SUMS - name: Make Update spec run: | diff --git a/pyproject.toml b/pyproject.toml index 96cb368b6d91..b746fbc96426 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ build = [ "build", "hatchling", "pip", - "setuptools>=66.1.0,<70", + "setuptools", "wheel", ] dev = [ @@ -78,8 +78,7 @@ test = [ "pytest~=8.1", ] pyinstaller = [ - "pyinstaller>=6.3; sys_platform!='darwin'", - "pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi + "pyinstaller>=6.7.0", # for compat with setuptools>=70 ] py2exe = [ "py2exe>=0.12", From 2e5a47da400b645aadbda6afd1156bd89c744f48 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 30 May 2024 18:04:27 -0500 Subject: [PATCH 046/145] [ie/PatreonCampaign] Fix `campaign_id` extraction (#10070) Closes #10013 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6c441ff34cda..efbface4b4de 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -486,7 +486,8 @@ def _real_extract(self, url): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) - campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') + campaign_id = self._search_nextjs_data( + webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] params = { 'json-api-use-default-includes': 'false', From db50f19d76c6870a5a13d0cab9287d684fd7449a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 1 Jun 2024 13:57:23 -0500 Subject: [PATCH 047/145] [rh:requests] Bump minimum `requests` version to 2.32.2 (#10079) Closes #10078 Authored by: bashonly --- README.md | 2 +- bundle/py2exe.py | 6 +++--- pyproject.toml | 3 +-- yt_dlp/networking/_requests.py | 9 ++------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e8cd6d3a06f8..42ffd9b52021 100644 --- a/README.md +++ b/README.md @@ -262,7 +262,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati ### Standalone Py2Exe Builds (Windows) -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run. +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 2811674925b0..5fbe55e465e3 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -42,9 +42,9 @@ def main(): # py2exe cannot import Crypto 'Crypto', 'Cryptodome', - # py2exe appears to confuse this with our socks library. - # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. - 'urllib3.contrib.socks' + # requests >=2.32.0 breaks py2exe builds due to certifi dependency + 'requests', + 'urllib3' ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here diff --git a/pyproject.toml b/pyproject.toml index b746fbc96426..da6403ec7cc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "certifi", "mutagen", "pycryptodomex", - "requests>=2.31.0,<3", + "requests>=2.32.2,<3", "urllib3>=1.26.17,<3", "websockets>=12.0", ] @@ -82,7 +82,6 @@ pyinstaller = [ ] py2exe = [ "py2exe>=0.12", - "requests==2.31.*", ] [project.urls] diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 6397a2c0ca92..bf6fa634ddb2 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -21,8 +21,8 @@ if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023100: - raise ImportError('Only requests >= 2.31.0 is supported') +if requests.__build__ < 0x023202: + raise ImportError('Only requests >= 2.32.2 is supported') import requests.adapters import requests.utils @@ -182,14 +182,9 @@ def proxy_manager_for(self, proxy, **proxy_kwargs): return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) # Skip `requests` internal verification; we use our own SSLContext - # requests 2.31.0+ def cert_verify(*args, **kwargs): pass - # requests 2.31.0-2.32.1 - def _get_connection(self, request, *_, proxies=None, **__): - return self.get_connection(request.url, proxies) - # requests 2.32.2+: Reimplementation without `_urllib3_request_context` def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None): url = urllib3.util.parse_url(request.url).url From add96eb9f84cfffe85682bf2fb85135746994ee8 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Wed, 12 Jun 2024 01:09:58 +0200 Subject: [PATCH 048/145] [cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> --- CONTRIBUTING.md | 2 +- bundle/py2exe.py | 2 +- bundle/pyinstaller.py | 8 +- devscripts/bash-completion.py | 8 +- devscripts/make_changelog.py | 10 +- devscripts/make_readme.py | 12 +- devscripts/set-variant.py | 2 +- devscripts/update-version.py | 2 +- devscripts/zsh-completion.py | 16 +- pyproject.toml | 127 ++- test/conftest.py | 10 +- test/helper.py | 32 +- test/test_InfoExtractor.py | 138 +-- test/test_YoutubeDL.py | 45 +- test/test_aes.py | 12 +- test/test_compat.py | 10 +- test/test_config.py | 2 +- test/test_cookies.py | 132 +-- test/test_download.py | 18 +- test/test_downloader_http.py | 6 +- test/test_http_proxy.py | 4 +- test/test_iqiyi_sdk_interpreter.py | 4 +- test/test_netrc.py | 2 +- test/test_networking.py | 56 +- test/test_networking_utils.py | 12 +- test/test_overwrites.py | 4 +- test/test_plugins.py | 2 +- test/test_post_hooks.py | 2 +- test/test_postprocessors.py | 137 +-- test/test_socks.py | 8 +- test/test_subtitles.py | 11 +- test/test_traversal.py | 6 +- test/test_update.py | 8 +- test/test_utils.py | 64 +- test/test_websockets.py | 4 +- test/test_youtube_misc.py | 2 +- test/test_youtube_signature.py | 8 +- yt_dlp/YoutubeDL.py | 306 +++---- yt_dlp/__init__.py | 39 +- yt_dlp/aes.py | 36 +- yt_dlp/cache.py | 4 +- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/compat/functools.py | 2 +- yt_dlp/cookies.py | 55 +- yt_dlp/downloader/common.py | 2 +- yt_dlp/downloader/external.py | 24 +- yt_dlp/downloader/f4m.py | 22 +- yt_dlp/downloader/fragment.py | 8 +- yt_dlp/downloader/hls.py | 9 +- yt_dlp/downloader/http.py | 14 +- yt_dlp/downloader/ism.py | 2 +- yt_dlp/downloader/mhtml.py | 53 +- yt_dlp/downloader/niconico.py | 14 +- yt_dlp/downloader/rtmp.py | 6 +- yt_dlp/downloader/youtube_live_chat.py | 2 +- yt_dlp/extractor/abc.py | 28 +- yt_dlp/extractor/abcnews.py | 2 +- yt_dlp/extractor/abcotvs.py | 5 +- yt_dlp/extractor/abematv.py | 22 +- yt_dlp/extractor/acast.py | 8 +- yt_dlp/extractor/acfun.py | 4 +- yt_dlp/extractor/adn.py | 19 +- yt_dlp/extractor/adobeconnect.py | 10 +- yt_dlp/extractor/adobepass.py | 866 +++++++++--------- yt_dlp/extractor/adobetv.py | 5 +- yt_dlp/extractor/adultswim.py | 6 +- yt_dlp/extractor/aenetworks.py | 34 +- yt_dlp/extractor/aeonco.py | 8 +- yt_dlp/extractor/afreecatv.py | 4 +- yt_dlp/extractor/agora.py | 6 +- yt_dlp/extractor/airtv.py | 4 +- yt_dlp/extractor/aitube.py | 2 +- yt_dlp/extractor/aliexpress.py | 3 +- yt_dlp/extractor/aljazeera.py | 14 +- yt_dlp/extractor/allocine.py | 5 +- yt_dlp/extractor/allstar.py | 26 +- yt_dlp/extractor/alphaporno.py | 2 +- yt_dlp/extractor/alsace20tv.py | 6 +- yt_dlp/extractor/altcensored.py | 2 +- yt_dlp/extractor/alura.py | 16 +- yt_dlp/extractor/amadeustv.py | 2 +- yt_dlp/extractor/amara.py | 10 +- yt_dlp/extractor/amazon.py | 8 +- yt_dlp/extractor/amazonminitv.py | 2 +- yt_dlp/extractor/amcnetworks.py | 12 +- yt_dlp/extractor/americastestkitchen.py | 14 +- yt_dlp/extractor/amp.py | 4 +- yt_dlp/extractor/anchorfm.py | 6 +- yt_dlp/extractor/angel.py | 10 +- yt_dlp/extractor/antenna.py | 2 +- yt_dlp/extractor/anvato.py | 8 +- yt_dlp/extractor/aol.py | 8 +- yt_dlp/extractor/apa.py | 4 +- yt_dlp/extractor/applepodcasts.py | 2 +- yt_dlp/extractor/appletrailers.py | 31 +- yt_dlp/extractor/archiveorg.py | 105 +-- yt_dlp/extractor/arcpublishing.py | 10 +- yt_dlp/extractor/ard.py | 6 +- yt_dlp/extractor/arkena.py | 2 +- yt_dlp/extractor/arnes.py | 12 +- yt_dlp/extractor/art19.py | 2 +- yt_dlp/extractor/arte.py | 24 +- yt_dlp/extractor/atresplayer.py | 4 +- yt_dlp/extractor/atscaleconf.py | 10 +- yt_dlp/extractor/atvat.py | 16 +- yt_dlp/extractor/audimedia.py | 4 +- yt_dlp/extractor/audioboom.py | 4 +- yt_dlp/extractor/audiodraft.py | 13 +- yt_dlp/extractor/audiomack.py | 27 +- yt_dlp/extractor/audius.py | 46 +- yt_dlp/extractor/awaan.py | 27 +- yt_dlp/extractor/aws.py | 24 +- yt_dlp/extractor/azmedien.py | 6 +- yt_dlp/extractor/baidu.py | 7 +- yt_dlp/extractor/banbye.py | 9 +- yt_dlp/extractor/bandcamp.py | 31 +- yt_dlp/extractor/bannedvideo.py | 12 +- yt_dlp/extractor/bbc.py | 110 +-- yt_dlp/extractor/beatport.py | 7 +- yt_dlp/extractor/beeg.py | 8 +- yt_dlp/extractor/behindkink.py | 2 +- yt_dlp/extractor/bellmedia.py | 2 +- yt_dlp/extractor/berufetv.py | 4 +- yt_dlp/extractor/bet.py | 8 +- yt_dlp/extractor/bfmtv.py | 4 +- yt_dlp/extractor/bigflix.py | 14 +- yt_dlp/extractor/bigo.py | 2 +- yt_dlp/extractor/bild.py | 4 +- yt_dlp/extractor/bilibili.py | 138 ++- yt_dlp/extractor/bitchute.py | 14 +- yt_dlp/extractor/blackboardcollaborate.py | 2 +- yt_dlp/extractor/bleacherreport.py | 12 +- yt_dlp/extractor/blerp.py | 25 +- yt_dlp/extractor/blogger.py | 4 +- yt_dlp/extractor/bloomberg.py | 2 +- yt_dlp/extractor/bokecc.py | 15 +- yt_dlp/extractor/bongacams.py | 9 +- yt_dlp/extractor/bostonglobe.py | 3 +- yt_dlp/extractor/box.py | 6 +- yt_dlp/extractor/boxcast.py | 10 +- yt_dlp/extractor/br.py | 8 +- yt_dlp/extractor/brainpop.py | 14 +- yt_dlp/extractor/bravotv.py | 2 +- yt_dlp/extractor/breitbart.py | 4 +- yt_dlp/extractor/brightcove.py | 74 +- yt_dlp/extractor/bundesliga.py | 10 +- yt_dlp/extractor/businessinsider.py | 4 +- yt_dlp/extractor/buzzfeed.py | 6 +- yt_dlp/extractor/byutv.py | 2 +- yt_dlp/extractor/c56.py | 4 +- yt_dlp/extractor/callin.py | 16 +- yt_dlp/extractor/caltrans.py | 2 +- yt_dlp/extractor/cam4.py | 4 +- yt_dlp/extractor/camdemy.py | 33 +- yt_dlp/extractor/camfm.py | 4 +- yt_dlp/extractor/cammodels.py | 8 +- yt_dlp/extractor/camtasia.py | 6 +- yt_dlp/extractor/canalalpha.py | 8 +- yt_dlp/extractor/canalc2.py | 2 +- yt_dlp/extractor/canalplus.py | 5 +- yt_dlp/extractor/caracoltv.py | 4 +- yt_dlp/extractor/cartoonnetwork.py | 2 +- yt_dlp/extractor/cbc.py | 51 +- yt_dlp/extractor/cbs.py | 6 +- yt_dlp/extractor/ccc.py | 6 +- yt_dlp/extractor/ccma.py | 6 +- yt_dlp/extractor/cctv.py | 7 +- yt_dlp/extractor/cda.py | 29 +- yt_dlp/extractor/cellebrite.py | 4 +- yt_dlp/extractor/ceskatelevize.py | 24 +- yt_dlp/extractor/cgtn.py | 10 +- yt_dlp/extractor/chaturbate.py | 6 +- yt_dlp/extractor/cinemax.py | 2 +- yt_dlp/extractor/cinetecamilano.py | 8 +- yt_dlp/extractor/cineverse.py | 10 +- yt_dlp/extractor/ciscolive.py | 4 +- yt_dlp/extractor/ciscowebex.py | 4 +- yt_dlp/extractor/cjsw.py | 2 +- yt_dlp/extractor/clippit.py | 4 +- yt_dlp/extractor/cliprs.py | 2 +- yt_dlp/extractor/closertotruth.py | 10 +- yt_dlp/extractor/cloudflarestream.py | 2 +- yt_dlp/extractor/cloudycdn.py | 6 +- yt_dlp/extractor/clubic.py | 4 +- yt_dlp/extractor/clyp.py | 6 +- yt_dlp/extractor/cmt.py | 4 +- yt_dlp/extractor/cnn.py | 8 +- yt_dlp/extractor/common.py | 134 ++- yt_dlp/extractor/commonmistakes.py | 6 +- yt_dlp/extractor/commonprotocols.py | 2 +- yt_dlp/extractor/condenast.py | 33 +- yt_dlp/extractor/contv.py | 2 +- yt_dlp/extractor/corus.py | 12 +- yt_dlp/extractor/coub.py | 8 +- yt_dlp/extractor/cozytv.py | 10 +- yt_dlp/extractor/cpac.py | 24 +- yt_dlp/extractor/cracked.py | 4 +- yt_dlp/extractor/crackle.py | 14 +- yt_dlp/extractor/craftsy.py | 2 +- yt_dlp/extractor/crooksandliars.py | 4 +- yt_dlp/extractor/crowdbunker.py | 28 +- yt_dlp/extractor/crtvg.py | 4 +- yt_dlp/extractor/crunchyroll.py | 6 +- yt_dlp/extractor/cspan.py | 26 +- yt_dlp/extractor/ctsnews.py | 4 +- yt_dlp/extractor/ctv.py | 4 +- yt_dlp/extractor/ctvnews.py | 6 +- yt_dlp/extractor/cultureunplugged.py | 8 +- yt_dlp/extractor/curiositystream.py | 9 +- yt_dlp/extractor/cwtv.py | 4 +- yt_dlp/extractor/cybrary.py | 20 +- yt_dlp/extractor/dailymail.py | 9 +- yt_dlp/extractor/dailymotion.py | 16 +- yt_dlp/extractor/dailywire.py | 6 +- yt_dlp/extractor/damtomo.py | 9 +- yt_dlp/extractor/daum.py | 28 +- yt_dlp/extractor/dbtv.py | 2 +- yt_dlp/extractor/dctp.py | 11 +- yt_dlp/extractor/deezer.py | 6 +- yt_dlp/extractor/democracynow.py | 8 +- yt_dlp/extractor/detik.py | 20 +- yt_dlp/extractor/deuxm.py | 16 +- yt_dlp/extractor/dfb.py | 4 +- yt_dlp/extractor/digitalconcerthall.py | 8 +- yt_dlp/extractor/digiteka.py | 2 +- yt_dlp/extractor/discovery.py | 10 +- yt_dlp/extractor/discoverygo.py | 5 +- yt_dlp/extractor/disney.py | 8 +- yt_dlp/extractor/dispeak.py | 10 +- yt_dlp/extractor/dlf.py | 36 +- yt_dlp/extractor/dlive.py | 8 +- yt_dlp/extractor/douyutv.py | 10 +- yt_dlp/extractor/dplay.py | 16 +- yt_dlp/extractor/drbonanza.py | 2 +- yt_dlp/extractor/dreisat.py | 4 +- yt_dlp/extractor/drooble.py | 6 +- yt_dlp/extractor/dropbox.py | 14 +- yt_dlp/extractor/dropout.py | 34 +- yt_dlp/extractor/drtuber.py | 10 +- yt_dlp/extractor/drtv.py | 18 +- yt_dlp/extractor/dtube.py | 6 +- yt_dlp/extractor/duboku.py | 29 +- yt_dlp/extractor/dumpert.py | 4 +- yt_dlp/extractor/dvtv.py | 14 +- yt_dlp/extractor/dw.py | 15 +- yt_dlp/extractor/eagleplatform.py | 20 +- yt_dlp/extractor/ebaumsworld.py | 2 +- yt_dlp/extractor/ebay.py | 4 +- yt_dlp/extractor/egghead.py | 11 +- yt_dlp/extractor/eighttracks.py | 49 +- yt_dlp/extractor/eitb.py | 8 +- yt_dlp/extractor/elpais.py | 4 +- yt_dlp/extractor/eltrecetv.py | 4 +- yt_dlp/extractor/epicon.py | 29 +- yt_dlp/extractor/epoch.py | 10 +- yt_dlp/extractor/eporner.py | 14 +- yt_dlp/extractor/erocast.py | 2 +- yt_dlp/extractor/eroprofile.py | 6 +- yt_dlp/extractor/err.py | 2 +- yt_dlp/extractor/ertgr.py | 17 +- yt_dlp/extractor/espn.py | 41 +- yt_dlp/extractor/ettutv.py | 2 +- yt_dlp/extractor/europa.py | 24 +- yt_dlp/extractor/europeantour.py | 8 +- yt_dlp/extractor/eurosport.py | 10 +- yt_dlp/extractor/euscreen.py | 18 +- yt_dlp/extractor/expressen.py | 2 +- yt_dlp/extractor/eyedotv.py | 12 +- yt_dlp/extractor/facebook.py | 49 +- yt_dlp/extractor/fancode.py | 35 +- yt_dlp/extractor/fc2.py | 18 +- yt_dlp/extractor/filmon.py | 11 +- yt_dlp/extractor/filmweb.py | 2 +- yt_dlp/extractor/firsttv.py | 27 +- yt_dlp/extractor/flickr.py | 14 +- yt_dlp/extractor/floatplane.py | 2 +- yt_dlp/extractor/folketinget.py | 5 +- yt_dlp/extractor/footyroom.py | 2 +- yt_dlp/extractor/fourtube.py | 41 +- yt_dlp/extractor/fox.py | 15 +- yt_dlp/extractor/fptplay.py | 2 +- yt_dlp/extractor/francetv.py | 6 +- yt_dlp/extractor/freesound.py | 2 +- yt_dlp/extractor/freetv.py | 10 +- yt_dlp/extractor/frontendmasters.py | 31 +- yt_dlp/extractor/fujitv.py | 8 +- yt_dlp/extractor/funimation.py | 32 +- yt_dlp/extractor/funker530.py | 6 +- yt_dlp/extractor/fuyintv.py | 2 +- yt_dlp/extractor/gab.py | 22 +- yt_dlp/extractor/gaia.py | 14 +- yt_dlp/extractor/gamejolt.py | 34 +- yt_dlp/extractor/gamespot.py | 5 +- yt_dlp/extractor/gamestar.py | 6 +- yt_dlp/extractor/gaskrank.py | 4 +- yt_dlp/extractor/gazeta.py | 4 +- yt_dlp/extractor/gbnews.py | 14 +- yt_dlp/extractor/gdcvault.py | 10 +- yt_dlp/extractor/gedidigital.py | 4 +- yt_dlp/extractor/generic.py | 145 ++- yt_dlp/extractor/genericembeds.py | 10 +- yt_dlp/extractor/getcourseru.py | 22 +- yt_dlp/extractor/gettr.py | 14 +- yt_dlp/extractor/giantbomb.py | 2 +- yt_dlp/extractor/gigya.py | 2 +- yt_dlp/extractor/glide.py | 2 +- yt_dlp/extractor/globalplayer.py | 4 +- yt_dlp/extractor/globo.py | 29 +- yt_dlp/extractor/glomex.py | 6 +- yt_dlp/extractor/gmanetwork.py | 4 +- yt_dlp/extractor/go.py | 19 +- yt_dlp/extractor/godresource.py | 10 +- yt_dlp/extractor/godtube.py | 4 +- yt_dlp/extractor/gofile.py | 8 +- yt_dlp/extractor/golem.py | 16 +- yt_dlp/extractor/googledrive.py | 20 +- yt_dlp/extractor/googlepodcasts.py | 2 +- yt_dlp/extractor/goplay.py | 143 ++- yt_dlp/extractor/gopro.py | 8 +- yt_dlp/extractor/goshgay.py | 9 +- yt_dlp/extractor/gotostage.py | 19 +- yt_dlp/extractor/gputechconf.py | 4 +- yt_dlp/extractor/gronkh.py | 14 +- yt_dlp/extractor/groupon.py | 3 +- yt_dlp/extractor/harpodeon.py | 6 +- yt_dlp/extractor/hbo.py | 8 +- yt_dlp/extractor/heise.py | 6 +- yt_dlp/extractor/hidive.py | 6 +- yt_dlp/extractor/historicfilms.py | 2 +- yt_dlp/extractor/hitrecord.py | 11 +- yt_dlp/extractor/hketv.py | 3 +- yt_dlp/extractor/hollywoodreporter.py | 2 +- yt_dlp/extractor/holodex.py | 2 +- yt_dlp/extractor/hotnewhiphop.py | 9 +- yt_dlp/extractor/hotstar.py | 11 +- yt_dlp/extractor/hrfensehen.py | 12 +- yt_dlp/extractor/hrti.py | 20 +- yt_dlp/extractor/hse.py | 4 +- yt_dlp/extractor/huajiao.py | 2 +- yt_dlp/extractor/huffpost.py | 2 +- yt_dlp/extractor/hungama.py | 6 +- yt_dlp/extractor/huya.py | 13 +- yt_dlp/extractor/hypem.py | 6 +- yt_dlp/extractor/hypergryph.py | 4 +- yt_dlp/extractor/hytale.py | 4 +- yt_dlp/extractor/icareus.py | 12 +- yt_dlp/extractor/ichinanalive.py | 15 +- yt_dlp/extractor/ign.py | 21 +- yt_dlp/extractor/iheart.py | 2 +- yt_dlp/extractor/ilpost.py | 2 +- yt_dlp/extractor/iltalehti.py | 2 +- yt_dlp/extractor/imdb.py | 8 +- yt_dlp/extractor/imggaming.py | 2 +- yt_dlp/extractor/imgur.py | 21 +- yt_dlp/extractor/ina.py | 2 +- yt_dlp/extractor/inc.py | 2 +- yt_dlp/extractor/indavideo.py | 4 +- yt_dlp/extractor/infoq.py | 12 +- yt_dlp/extractor/instagram.py | 54 +- yt_dlp/extractor/internazionale.py | 4 +- yt_dlp/extractor/iprima.py | 4 +- yt_dlp/extractor/iqiyi.py | 77 +- yt_dlp/extractor/islamchannel.py | 2 +- yt_dlp/extractor/israelnationalnews.py | 6 +- yt_dlp/extractor/itprotv.py | 14 +- yt_dlp/extractor/itv.py | 33 +- yt_dlp/extractor/ivi.py | 26 +- yt_dlp/extractor/ivideon.py | 16 +- yt_dlp/extractor/iwara.py | 8 +- yt_dlp/extractor/ixigua.py | 2 +- yt_dlp/extractor/izlesene.py | 18 +- yt_dlp/extractor/jamendo.py | 32 +- yt_dlp/extractor/japandiet.py | 8 +- yt_dlp/extractor/jiocinema.py | 14 +- yt_dlp/extractor/jiosaavn.py | 2 +- yt_dlp/extractor/joj.py | 15 +- yt_dlp/extractor/jove.py | 4 +- yt_dlp/extractor/jwplatform.py | 4 +- yt_dlp/extractor/kakao.py | 10 +- yt_dlp/extractor/kaltura.py | 73 +- yt_dlp/extractor/kankanews.py | 2 +- yt_dlp/extractor/karaoketv.py | 4 +- yt_dlp/extractor/kelbyone.py | 2 +- yt_dlp/extractor/kicker.py | 6 +- yt_dlp/extractor/kinja.py | 18 +- yt_dlp/extractor/kommunetv.py | 10 +- yt_dlp/extractor/kompas.py | 2 +- yt_dlp/extractor/koo.py | 27 +- yt_dlp/extractor/kth.py | 7 +- yt_dlp/extractor/ku6.py | 10 +- yt_dlp/extractor/kuwo.py | 42 +- yt_dlp/extractor/la7.py | 2 +- yt_dlp/extractor/laxarxames.py | 2 +- yt_dlp/extractor/lbry.py | 16 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/lecture2go.py | 2 +- yt_dlp/extractor/lecturio.py | 6 +- yt_dlp/extractor/leeco.py | 41 +- yt_dlp/extractor/lego.py | 6 +- yt_dlp/extractor/lenta.py | 2 +- yt_dlp/extractor/libraryofcongress.py | 2 +- yt_dlp/extractor/libsyn.py | 6 +- yt_dlp/extractor/lifenews.py | 27 +- yt_dlp/extractor/likee.py | 2 +- yt_dlp/extractor/limelight.py | 16 +- yt_dlp/extractor/linkedin.py | 27 +- yt_dlp/extractor/liputan6.py | 6 +- yt_dlp/extractor/listennotes.py | 6 +- yt_dlp/extractor/litv.py | 6 +- yt_dlp/extractor/livejournal.py | 5 +- yt_dlp/extractor/livestream.py | 42 +- yt_dlp/extractor/livestreamfails.py | 4 +- yt_dlp/extractor/lnkgo.py | 27 +- yt_dlp/extractor/lovehomeporn.py | 6 +- yt_dlp/extractor/lrt.py | 8 +- yt_dlp/extractor/lsm.py | 12 +- yt_dlp/extractor/lumni.py | 2 +- yt_dlp/extractor/lynda.py | 54 +- yt_dlp/extractor/magentamusik.py | 2 +- yt_dlp/extractor/mailru.py | 15 +- yt_dlp/extractor/mainstreaming.py | 28 +- yt_dlp/extractor/mangomolo.py | 13 +- yt_dlp/extractor/manoto.py | 12 +- yt_dlp/extractor/manyvids.py | 8 +- yt_dlp/extractor/markiza.py | 9 +- yt_dlp/extractor/massengeschmacktv.py | 2 +- yt_dlp/extractor/masters.py | 2 +- yt_dlp/extractor/mdr.py | 7 +- yt_dlp/extractor/medaltv.py | 13 +- yt_dlp/extractor/mediaite.py | 14 +- yt_dlp/extractor/mediaklikk.py | 39 +- yt_dlp/extractor/mediaset.py | 6 +- yt_dlp/extractor/mediasite.py | 87 +- yt_dlp/extractor/mediaworksnz.py | 10 +- yt_dlp/extractor/meipai.py | 4 +- yt_dlp/extractor/melonvod.py | 4 +- yt_dlp/extractor/metacritic.py | 6 +- yt_dlp/extractor/mgtv.py | 6 +- yt_dlp/extractor/microsoftembed.py | 4 +- yt_dlp/extractor/microsoftstream.py | 6 +- yt_dlp/extractor/microsoftvirtualacademy.py | 21 +- yt_dlp/extractor/mildom.py | 10 +- yt_dlp/extractor/minds.py | 11 +- yt_dlp/extractor/minoto.py | 2 +- yt_dlp/extractor/mirrativ.py | 6 +- yt_dlp/extractor/mit.py | 8 +- yt_dlp/extractor/mixch.py | 4 +- yt_dlp/extractor/mixcloud.py | 37 +- yt_dlp/extractor/mlb.py | 16 +- yt_dlp/extractor/mlssoccer.py | 69 +- yt_dlp/extractor/mocha.py | 4 +- yt_dlp/extractor/mojvideo.py | 6 +- yt_dlp/extractor/monstercat.py | 6 +- yt_dlp/extractor/motherless.py | 6 +- yt_dlp/extractor/motorsport.py | 11 +- yt_dlp/extractor/moview.py | 6 +- yt_dlp/extractor/moviezine.py | 2 +- yt_dlp/extractor/movingimage.py | 2 +- yt_dlp/extractor/msn.py | 5 +- yt_dlp/extractor/mtv.py | 50 +- yt_dlp/extractor/muenchentv.py | 8 +- yt_dlp/extractor/murrtube.py | 6 +- yt_dlp/extractor/musescore.py | 12 +- yt_dlp/extractor/musicdex.py | 50 +- yt_dlp/extractor/mx3.py | 10 +- yt_dlp/extractor/mxplayer.py | 25 +- yt_dlp/extractor/myspace.py | 14 +- yt_dlp/extractor/myspass.py | 3 +- yt_dlp/extractor/mzaalo.py | 6 +- yt_dlp/extractor/n1.py | 6 +- yt_dlp/extractor/nate.py | 24 +- yt_dlp/extractor/nationalgeographic.py | 2 +- yt_dlp/extractor/naver.py | 12 +- yt_dlp/extractor/nba.py | 23 +- yt_dlp/extractor/nbc.py | 26 +- yt_dlp/extractor/ndr.py | 18 +- yt_dlp/extractor/ndtv.py | 26 +- yt_dlp/extractor/nekohacker.py | 32 +- yt_dlp/extractor/neteasemusic.py | 10 +- yt_dlp/extractor/netverse.py | 14 +- yt_dlp/extractor/netzkino.py | 6 +- yt_dlp/extractor/newgrounds.py | 10 +- yt_dlp/extractor/newsy.py | 4 +- yt_dlp/extractor/nextmedia.py | 17 +- yt_dlp/extractor/nexx.py | 92 +- yt_dlp/extractor/nfhsnetwork.py | 52 +- yt_dlp/extractor/nfl.py | 4 +- yt_dlp/extractor/nhk.py | 24 +- yt_dlp/extractor/nhl.py | 9 +- yt_dlp/extractor/nick.py | 18 +- yt_dlp/extractor/niconico.py | 74 +- yt_dlp/extractor/niconicochannelplus.py | 4 +- yt_dlp/extractor/ninaprotocol.py | 10 +- yt_dlp/extractor/ninecninemedia.py | 10 +- yt_dlp/extractor/ninegag.py | 6 +- yt_dlp/extractor/ninenews.py | 4 +- yt_dlp/extractor/ninenow.py | 21 +- yt_dlp/extractor/nintendo.py | 2 +- yt_dlp/extractor/nitter.py | 18 +- yt_dlp/extractor/nobelprize.py | 2 +- yt_dlp/extractor/noice.py | 6 +- yt_dlp/extractor/nonktube.py | 2 +- yt_dlp/extractor/noodlemagazine.py | 6 +- yt_dlp/extractor/noovo.py | 7 +- yt_dlp/extractor/nosnl.py | 6 +- yt_dlp/extractor/nova.py | 8 +- yt_dlp/extractor/novaplay.py | 4 +- yt_dlp/extractor/nowness.py | 7 +- yt_dlp/extractor/noz.py | 9 +- yt_dlp/extractor/npo.py | 40 +- yt_dlp/extractor/npr.py | 4 +- yt_dlp/extractor/nrk.py | 72 +- yt_dlp/extractor/ntvru.py | 6 +- yt_dlp/extractor/nubilesporn.py | 6 +- yt_dlp/extractor/nuevo.py | 2 +- yt_dlp/extractor/nuvid.py | 8 +- yt_dlp/extractor/nytimes.py | 2 +- yt_dlp/extractor/nzherald.py | 21 +- yt_dlp/extractor/nzonscreen.py | 2 +- yt_dlp/extractor/odkmedia.py | 4 +- yt_dlp/extractor/odnoklassniki.py | 19 +- yt_dlp/extractor/oftv.py | 8 +- yt_dlp/extractor/oktoberfesttv.py | 2 +- yt_dlp/extractor/olympics.py | 8 +- yt_dlp/extractor/on24.py | 6 +- yt_dlp/extractor/onefootball.py | 2 +- yt_dlp/extractor/onenewsnz.py | 10 +- yt_dlp/extractor/oneplace.py | 4 +- yt_dlp/extractor/onet.py | 6 +- yt_dlp/extractor/onionstudios.py | 3 +- yt_dlp/extractor/opencast.py | 2 +- yt_dlp/extractor/openload.py | 10 +- yt_dlp/extractor/openrec.py | 7 +- yt_dlp/extractor/ora.py | 8 +- yt_dlp/extractor/orf.py | 18 +- yt_dlp/extractor/outsidetv.py | 2 +- yt_dlp/extractor/packtpub.py | 9 +- yt_dlp/extractor/palcomp3.py | 11 +- yt_dlp/extractor/panopto.py | 66 +- yt_dlp/extractor/paramountplus.py | 8 +- yt_dlp/extractor/parler.py | 2 +- yt_dlp/extractor/parlview.py | 7 +- yt_dlp/extractor/patreon.py | 22 +- yt_dlp/extractor/pbs.py | 37 +- yt_dlp/extractor/pearvideo.py | 4 +- yt_dlp/extractor/peertube.py | 71 +- yt_dlp/extractor/peertv.py | 2 +- yt_dlp/extractor/peloton.py | 26 +- yt_dlp/extractor/performgroup.py | 6 +- yt_dlp/extractor/periscope.py | 12 +- yt_dlp/extractor/philharmoniedeparis.py | 7 +- yt_dlp/extractor/phoenix.py | 9 +- yt_dlp/extractor/photobucket.py | 6 +- yt_dlp/extractor/piapro.py | 15 +- yt_dlp/extractor/picarto.py | 14 +- yt_dlp/extractor/piksel.py | 8 +- yt_dlp/extractor/pinkbike.py | 8 +- yt_dlp/extractor/pinterest.py | 17 +- yt_dlp/extractor/pixivsketch.py | 4 +- yt_dlp/extractor/pladform.py | 12 +- yt_dlp/extractor/planetmarathi.py | 15 +- yt_dlp/extractor/platzi.py | 22 +- yt_dlp/extractor/playsuisse.py | 20 +- yt_dlp/extractor/playtvak.py | 24 +- yt_dlp/extractor/playwire.py | 2 +- yt_dlp/extractor/pluralsight.py | 69 +- yt_dlp/extractor/plutotv.py | 25 +- yt_dlp/extractor/podchaser.py | 14 +- yt_dlp/extractor/podomatic.py | 11 +- yt_dlp/extractor/pokemon.py | 14 +- yt_dlp/extractor/pokergo.py | 28 +- yt_dlp/extractor/polsatgo.py | 6 +- yt_dlp/extractor/polskieradio.py | 17 +- yt_dlp/extractor/popcorntimes.py | 5 +- yt_dlp/extractor/popcorntv.py | 2 +- yt_dlp/extractor/pornbox.py | 12 +- yt_dlp/extractor/pornflip.py | 2 +- yt_dlp/extractor/pornhub.py | 66 +- yt_dlp/extractor/pornotube.py | 11 +- yt_dlp/extractor/pornovoisines.py | 6 +- yt_dlp/extractor/pornoxo.py | 2 +- yt_dlp/extractor/pr0gramm.py | 2 +- yt_dlp/extractor/prankcast.py | 24 +- yt_dlp/extractor/premiershiprugby.py | 2 +- yt_dlp/extractor/presstv.py | 10 +- yt_dlp/extractor/projectveritas.py | 10 +- yt_dlp/extractor/prosiebensat1.py | 19 +- yt_dlp/extractor/prx.py | 68 +- yt_dlp/extractor/puhutv.py | 41 +- yt_dlp/extractor/puls4.py | 3 +- yt_dlp/extractor/pyvideo.py | 7 +- yt_dlp/extractor/qingting.py | 4 +- yt_dlp/extractor/qqmusic.py | 33 +- yt_dlp/extractor/r7.py | 6 +- yt_dlp/extractor/radiko.py | 8 +- yt_dlp/extractor/radiocanada.py | 8 +- yt_dlp/extractor/radiocomercial.py | 14 +- yt_dlp/extractor/radiode.py | 4 +- yt_dlp/extractor/radiofrance.py | 4 +- yt_dlp/extractor/radiojavan.py | 2 +- yt_dlp/extractor/radiokapital.py | 4 +- yt_dlp/extractor/radiozet.py | 2 +- yt_dlp/extractor/radlive.py | 10 +- yt_dlp/extractor/rai.py | 20 +- yt_dlp/extractor/raywenderlich.py | 16 +- yt_dlp/extractor/rbgtum.py | 10 +- yt_dlp/extractor/rcs.py | 30 +- yt_dlp/extractor/rcti.py | 30 +- yt_dlp/extractor/rds.py | 7 +- yt_dlp/extractor/redbee.py | 30 +- yt_dlp/extractor/redbulltv.py | 19 +- yt_dlp/extractor/redge.py | 4 +- yt_dlp/extractor/redgifs.py | 30 +- yt_dlp/extractor/redtube.py | 4 +- yt_dlp/extractor/rentv.py | 7 +- yt_dlp/extractor/restudy.py | 4 +- yt_dlp/extractor/reuters.py | 8 +- yt_dlp/extractor/reverbnation.py | 6 +- yt_dlp/extractor/ridehome.py | 4 +- yt_dlp/extractor/rinsefm.py | 14 +- yt_dlp/extractor/rmcdecouverte.py | 8 +- yt_dlp/extractor/rockstargames.py | 2 +- yt_dlp/extractor/rokfin.py | 18 +- yt_dlp/extractor/roosterteeth.py | 6 +- yt_dlp/extractor/rottentomatoes.py | 4 +- yt_dlp/extractor/rozhlas.py | 16 +- yt_dlp/extractor/rte.py | 2 +- yt_dlp/extractor/rtl2.py | 4 +- yt_dlp/extractor/rtlnl.py | 26 +- yt_dlp/extractor/rtnews.py | 60 +- yt_dlp/extractor/rtp.py | 2 +- yt_dlp/extractor/rtrfm.py | 4 +- yt_dlp/extractor/rts.py | 11 +- yt_dlp/extractor/rtvcplay.py | 6 +- yt_dlp/extractor/rtve.py | 20 +- yt_dlp/extractor/rtvs.py | 8 +- yt_dlp/extractor/rtvslo.py | 6 +- yt_dlp/extractor/rule34video.py | 8 +- yt_dlp/extractor/rumble.py | 26 +- yt_dlp/extractor/rutube.py | 15 +- yt_dlp/extractor/rutv.py | 10 +- yt_dlp/extractor/ruutu.py | 16 +- yt_dlp/extractor/ruv.py | 8 +- yt_dlp/extractor/s4c.py | 6 +- yt_dlp/extractor/safari.py | 29 +- yt_dlp/extractor/saitosan.py | 8 +- yt_dlp/extractor/samplefocus.py | 8 +- yt_dlp/extractor/sapo.py | 2 +- yt_dlp/extractor/sbscokr.py | 4 +- yt_dlp/extractor/screencast.py | 15 +- yt_dlp/extractor/screencastomatic.py | 2 +- yt_dlp/extractor/scrippsnetworks.py | 12 +- yt_dlp/extractor/scrolller.py | 14 +- yt_dlp/extractor/scte.py | 6 +- yt_dlp/extractor/senategov.py | 19 +- yt_dlp/extractor/sendtonews.py | 6 +- yt_dlp/extractor/servus.py | 2 +- yt_dlp/extractor/sevenplus.py | 7 +- yt_dlp/extractor/sexu.py | 2 +- yt_dlp/extractor/seznamzpravy.py | 12 +- yt_dlp/extractor/shahid.py | 18 +- yt_dlp/extractor/shemaroome.py | 21 +- yt_dlp/extractor/showroomlive.py | 9 +- yt_dlp/extractor/sibnet.py | 4 +- yt_dlp/extractor/simplecast.py | 6 +- yt_dlp/extractor/sina.py | 7 +- yt_dlp/extractor/sixplay.py | 11 +- yt_dlp/extractor/skeb.py | 16 +- yt_dlp/extractor/sky.py | 2 +- yt_dlp/extractor/skyit.py | 10 +- yt_dlp/extractor/skylinewebcams.py | 2 +- yt_dlp/extractor/skynewsarabia.py | 11 +- yt_dlp/extractor/skynewsau.py | 12 +- yt_dlp/extractor/slideshare.py | 8 +- yt_dlp/extractor/slideslive.py | 2 +- yt_dlp/extractor/slutload.py | 12 +- yt_dlp/extractor/snotr.py | 2 +- yt_dlp/extractor/sohu.py | 48 +- yt_dlp/extractor/sonyliv.py | 6 +- yt_dlp/extractor/soundcloud.py | 54 +- yt_dlp/extractor/soundgasm.py | 4 +- yt_dlp/extractor/southpark.py | 4 +- yt_dlp/extractor/spankbang.py | 11 +- yt_dlp/extractor/spiegel.py | 4 +- yt_dlp/extractor/sport5.py | 6 +- yt_dlp/extractor/sportdeutschland.py | 12 +- yt_dlp/extractor/spotify.py | 6 +- yt_dlp/extractor/spreaker.py | 23 +- yt_dlp/extractor/springboardplatform.py | 5 +- yt_dlp/extractor/srgssr.py | 15 +- yt_dlp/extractor/srmediathek.py | 2 +- yt_dlp/extractor/stageplus.py | 2 +- yt_dlp/extractor/stanfordoc.py | 18 +- yt_dlp/extractor/startrek.py | 4 +- yt_dlp/extractor/startv.py | 31 +- yt_dlp/extractor/steam.py | 24 +- yt_dlp/extractor/stitcher.py | 5 +- yt_dlp/extractor/storyfire.py | 8 +- yt_dlp/extractor/streamable.py | 10 +- yt_dlp/extractor/streamcz.py | 14 +- yt_dlp/extractor/streetvoice.py | 8 +- yt_dlp/extractor/stretchinternet.py | 2 +- yt_dlp/extractor/stripchat.py | 2 +- yt_dlp/extractor/stv.py | 9 +- yt_dlp/extractor/substack.py | 8 +- yt_dlp/extractor/sunporno.py | 4 +- yt_dlp/extractor/sverigesradio.py | 2 +- yt_dlp/extractor/svt.py | 43 +- yt_dlp/extractor/swearnet.py | 8 +- yt_dlp/extractor/syfy.py | 4 +- yt_dlp/extractor/syvdk.py | 4 +- yt_dlp/extractor/tagesschau.py | 4 +- yt_dlp/extractor/taptap.py | 24 +- yt_dlp/extractor/tbs.py | 19 +- yt_dlp/extractor/tbsjp.py | 6 +- yt_dlp/extractor/teachable.py | 35 +- yt_dlp/extractor/teachertube.py | 12 +- yt_dlp/extractor/ted.py | 22 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/telecaribe.py | 2 +- yt_dlp/extractor/telecinco.py | 2 +- yt_dlp/extractor/telegraaf.py | 8 +- yt_dlp/extractor/telegram.py | 2 +- yt_dlp/extractor/telemb.py | 6 +- yt_dlp/extractor/telemundo.py | 4 +- yt_dlp/extractor/telequebec.py | 7 +- yt_dlp/extractor/teletask.py | 8 +- yt_dlp/extractor/telewebion.py | 2 +- yt_dlp/extractor/tempo.py | 18 +- yt_dlp/extractor/tencent.py | 2 +- yt_dlp/extractor/tennistv.py | 14 +- yt_dlp/extractor/tenplay.py | 6 +- yt_dlp/extractor/testurl.py | 2 +- yt_dlp/extractor/tf1.py | 4 +- yt_dlp/extractor/tfo.py | 4 +- yt_dlp/extractor/theguardian.py | 31 +- yt_dlp/extractor/theholetv.py | 6 +- yt_dlp/extractor/theintercept.py | 7 +- yt_dlp/extractor/theplatform.py | 32 +- yt_dlp/extractor/thestar.py | 2 +- yt_dlp/extractor/theweatherchannel.py | 8 +- yt_dlp/extractor/thisamericanlife.py | 4 +- yt_dlp/extractor/thisvid.py | 4 +- yt_dlp/extractor/threeqsdn.py | 6 +- yt_dlp/extractor/threespeak.py | 28 +- yt_dlp/extractor/tiktok.py | 42 +- yt_dlp/extractor/tmz.py | 6 +- yt_dlp/extractor/tnaflix.py | 13 +- yt_dlp/extractor/toggle.py | 24 +- yt_dlp/extractor/tonline.py | 4 +- yt_dlp/extractor/toongoggles.py | 2 +- yt_dlp/extractor/toutv.py | 2 +- yt_dlp/extractor/toypics.py | 8 +- yt_dlp/extractor/traileraddict.py | 4 +- yt_dlp/extractor/trovo.py | 4 +- yt_dlp/extractor/trtcocuk.py | 6 +- yt_dlp/extractor/trtworld.py | 10 +- yt_dlp/extractor/trueid.py | 6 +- yt_dlp/extractor/trutv.py | 2 +- yt_dlp/extractor/tube8.py | 10 +- yt_dlp/extractor/tubetugraz.py | 69 +- yt_dlp/extractor/tubitv.py | 8 +- yt_dlp/extractor/tumblr.py | 14 +- yt_dlp/extractor/tunein.py | 4 +- yt_dlp/extractor/turner.py | 9 +- yt_dlp/extractor/tv2.py | 18 +- yt_dlp/extractor/tv24ua.py | 6 +- yt_dlp/extractor/tv2dk.py | 2 +- yt_dlp/extractor/tv2hu.py | 19 +- yt_dlp/extractor/tv4.py | 2 +- yt_dlp/extractor/tv5unis.py | 12 +- yt_dlp/extractor/tvanouvelles.py | 4 +- yt_dlp/extractor/tvc.py | 2 +- yt_dlp/extractor/tver.py | 2 +- yt_dlp/extractor/tvigle.py | 8 +- yt_dlp/extractor/tviplayer.py | 8 +- yt_dlp/extractor/tvn24.py | 4 +- yt_dlp/extractor/tvnoe.py | 4 +- yt_dlp/extractor/tvp.py | 13 +- yt_dlp/extractor/tvplay.py | 14 +- yt_dlp/extractor/tvplayer.py | 7 +- yt_dlp/extractor/tweakers.py | 4 +- yt_dlp/extractor/twentymin.py | 6 +- yt_dlp/extractor/twentythreevideo.py | 4 +- yt_dlp/extractor/twitcasting.py | 14 +- yt_dlp/extractor/twitch.py | 114 ++- yt_dlp/extractor/twitter.py | 78 +- yt_dlp/extractor/txxx.py | 38 +- yt_dlp/extractor/udemy.py | 44 +- yt_dlp/extractor/udn.py | 8 +- yt_dlp/extractor/uktvplay.py | 2 +- yt_dlp/extractor/umg.py | 4 +- yt_dlp/extractor/unistra.py | 8 +- yt_dlp/extractor/unity.py | 2 +- yt_dlp/extractor/uol.py | 16 +- yt_dlp/extractor/urort.py | 12 +- yt_dlp/extractor/urplay.py | 8 +- yt_dlp/extractor/usatoday.py | 7 +- yt_dlp/extractor/ustream.py | 33 +- yt_dlp/extractor/ustudio.py | 12 +- yt_dlp/extractor/utreon.py | 10 +- yt_dlp/extractor/veo.py | 6 +- yt_dlp/extractor/veoh.py | 16 +- yt_dlp/extractor/vesti.py | 4 +- yt_dlp/extractor/vevo.py | 43 +- yt_dlp/extractor/vgtv.py | 19 +- yt_dlp/extractor/vh1.py | 2 +- yt_dlp/extractor/vice.py | 16 +- yt_dlp/extractor/viddler.py | 6 +- yt_dlp/extractor/videa.py | 7 +- yt_dlp/extractor/videocampus_sachsen.py | 34 +- yt_dlp/extractor/videofyme.py | 2 +- yt_dlp/extractor/videoken.py | 2 +- yt_dlp/extractor/videomore.py | 13 +- yt_dlp/extractor/videopress.py | 6 +- yt_dlp/extractor/vidio.py | 22 +- yt_dlp/extractor/vidlii.py | 4 +- yt_dlp/extractor/vidly.py | 2 +- yt_dlp/extractor/viewlift.py | 28 +- yt_dlp/extractor/viidea.py | 23 +- yt_dlp/extractor/viki.py | 28 +- yt_dlp/extractor/vimeo.py | 52 +- yt_dlp/extractor/vine.py | 15 +- yt_dlp/extractor/viously.py | 2 +- yt_dlp/extractor/viqeo.py | 2 +- yt_dlp/extractor/viu.py | 39 +- yt_dlp/extractor/vk.py | 10 +- yt_dlp/extractor/vodplatform.py | 2 +- yt_dlp/extractor/voicy.py | 19 +- yt_dlp/extractor/volejtv.py | 4 +- yt_dlp/extractor/voxmedia.py | 9 +- yt_dlp/extractor/vrt.py | 22 +- yt_dlp/extractor/vtm.py | 4 +- yt_dlp/extractor/vuclip.py | 10 +- yt_dlp/extractor/vvvvid.py | 22 +- yt_dlp/extractor/walla.py | 4 +- yt_dlp/extractor/washingtonpost.py | 6 +- yt_dlp/extractor/wat.py | 5 +- yt_dlp/extractor/wdr.py | 29 +- yt_dlp/extractor/webcamerapl.py | 4 +- yt_dlp/extractor/webcaster.py | 2 +- yt_dlp/extractor/webofstories.py | 18 +- yt_dlp/extractor/weibo.py | 10 +- yt_dlp/extractor/wevidi.py | 12 +- yt_dlp/extractor/whowatch.py | 21 +- yt_dlp/extractor/wikimedia.py | 4 +- yt_dlp/extractor/wimtv.py | 26 +- yt_dlp/extractor/wistia.py | 24 +- yt_dlp/extractor/wordpress.py | 14 +- yt_dlp/extractor/worldstarhiphop.py | 4 +- yt_dlp/extractor/wppilot.py | 4 +- yt_dlp/extractor/wsj.py | 6 +- yt_dlp/extractor/wwe.py | 7 +- yt_dlp/extractor/wykop.py | 2 +- yt_dlp/extractor/xanimu.py | 19 +- yt_dlp/extractor/xboxclips.py | 4 +- yt_dlp/extractor/xhamster.py | 27 +- yt_dlp/extractor/xiaohongshu.py | 4 +- yt_dlp/extractor/ximalaya.py | 30 +- yt_dlp/extractor/xinpianchang.py | 4 +- yt_dlp/extractor/xminus.py | 2 +- yt_dlp/extractor/xnxx.py | 2 +- yt_dlp/extractor/xstream.py | 5 +- yt_dlp/extractor/xvideos.py | 48 +- yt_dlp/extractor/xxxymovies.py | 2 +- yt_dlp/extractor/yahoo.py | 16 +- yt_dlp/extractor/yandexdisk.py | 4 +- yt_dlp/extractor/yandexmusic.py | 73 +- yt_dlp/extractor/yandexvideo.py | 12 +- yt_dlp/extractor/yapfiles.py | 6 +- yt_dlp/extractor/yappy.py | 12 +- yt_dlp/extractor/yle_areena.py | 10 +- yt_dlp/extractor/youjizz.py | 2 +- yt_dlp/extractor/youku.py | 4 +- yt_dlp/extractor/younow.py | 45 +- yt_dlp/extractor/youporn.py | 6 +- yt_dlp/extractor/youtube.py | 337 ++++--- yt_dlp/extractor/zaiko.py | 2 +- yt_dlp/extractor/zapiks.py | 4 +- yt_dlp/extractor/zattoo.py | 49 +- yt_dlp/extractor/zdf.py | 31 +- yt_dlp/extractor/zee5.py | 33 +- yt_dlp/extractor/zeenews.py | 6 +- yt_dlp/extractor/zenporn.py | 8 +- yt_dlp/extractor/zetland.py | 4 +- yt_dlp/extractor/zhihu.py | 2 +- yt_dlp/extractor/zingmp3.py | 12 +- yt_dlp/extractor/zoom.py | 10 +- yt_dlp/extractor/zype.py | 6 +- yt_dlp/jsinterp.py | 20 +- yt_dlp/networking/__init__.py | 2 +- yt_dlp/networking/_curlcffi.py | 2 +- yt_dlp/networking/_helper.py | 4 +- yt_dlp/networking/_requests.py | 20 +- yt_dlp/networking/_urllib.py | 8 +- yt_dlp/networking/_websockets.py | 6 +- yt_dlp/networking/common.py | 28 +- yt_dlp/networking/exceptions.py | 2 +- yt_dlp/networking/impersonate.py | 6 +- yt_dlp/options.py | 42 +- yt_dlp/postprocessor/__init__.py | 2 +- yt_dlp/postprocessor/common.py | 6 +- yt_dlp/postprocessor/embedthumbnail.py | 19 +- yt_dlp/postprocessor/exec.py | 5 +- yt_dlp/postprocessor/ffmpeg.py | 57 +- yt_dlp/postprocessor/modify_chapters.py | 2 +- .../postprocessor/movefilesafterdownload.py | 7 +- yt_dlp/postprocessor/sponskrub.py | 4 +- yt_dlp/postprocessor/sponsorblock.py | 10 +- yt_dlp/socks.py | 8 +- yt_dlp/update.py | 10 +- yt_dlp/utils/_legacy.py | 10 +- yt_dlp/utils/_utils.py | 199 ++-- yt_dlp/utils/networking.py | 4 +- yt_dlp/webvtt.py | 17 +- 915 files changed, 7028 insertions(+), 7247 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 837b600e3178..aeba3c44d125 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -266,7 +266,7 @@ After you have ensured this site is distributing its content legally, you can fo $ hatch fmt --check ``` - You can use `hatch fmt` to automatically fix problems. + You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 5fbe55e465e3..5b7f4883bcee 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -44,7 +44,7 @@ def main(): 'Cryptodome', # requests >=2.32.0 breaks py2exe builds due to certifi dependency 'requests', - 'urllib3' + 'urllib3', ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index db9dbfde515e..4184c4bc9f51 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -68,7 +68,7 @@ def exe(onedir): 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe' + OS_NAME == 'win32' and '.exe', ))) @@ -113,7 +113,7 @@ def windows_set_version(exe, version): ), kids=[ StringFileInfo([StringTable('040904B0', [ - StringStruct('Comments', 'yt-dlp%s Command Line Interface' % suffix), + StringStruct('Comments', f'yt-dlp{suffix} Command Line Interface'), StringStruct('CompanyName', 'https://github.com/yt-dlp'), StringStruct('FileDescription', 'yt-dlp%s' % (MACHINE and f' ({MACHINE})')), StringStruct('FileVersion', version), @@ -123,8 +123,8 @@ def windows_set_version(exe, version): StringStruct('ProductName', f'yt-dlp{suffix}'), StringStruct( 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), - ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]) - ] + ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]), + ], )) diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 9b4a9d4e2f4b..3918ebde86de 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -9,8 +9,8 @@ import yt_dlp -BASH_COMPLETION_FILE = "completions/bash/yt-dlp" -BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" +BASH_COMPLETION_FILE = 'completions/bash/yt-dlp' +BASH_COMPLETION_TEMPLATE = 'devscripts/bash-completion.in' def build_completion(opt_parser): @@ -21,9 +21,9 @@ def build_completion(opt_parser): opts_flag.append(option.get_opt_string()) with open(BASH_COMPLETION_TEMPLATE) as f: template = f.read() - with open(BASH_COMPLETION_FILE, "w") as f: + with open(BASH_COMPLETION_FILE, 'w') as f: # just using the special char - filled_template = template.replace("{{flags}}", " ".join(opts_flag)) + filled_template = template.replace('{{flags}}', ' '.join(opts_flag)) f.write(filled_template) diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index 8e199e7d0e8b..00634fb9116d 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -223,10 +223,10 @@ def format_single_change(self, info: CommitInfo): return message if not sep else f'{message}{sep}{rest}' - def _format_message_link(self, message, hash): - assert message or hash, 'Improperly defined commit message or override' - message = message if message else hash[:HASH_LENGTH] - return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + def _format_message_link(self, message, commit_hash): + assert message or commit_hash, 'Improperly defined commit message or override' + message = message if message else commit_hash[:HASH_LENGTH] + return f'[{message}]({self.repo_url}/commit/{commit_hash})' if commit_hash else message def _format_issues(self, issues): return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) @@ -356,7 +356,7 @@ def apply_overrides(self, overrides): logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') self._commits[commit.hash] = commit - self._commits = {key: value for key, value in reversed(self._commits.items())} + self._commits = dict(reversed(self._commits.items())) def groups(self): group_dict = defaultdict(list) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 2270b31d3b97..cbb5859aa1a6 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -51,7 +51,7 @@ def apply_patch(text, patch): ), ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', - r'## \1' + r'## \1', ), ( # Fixup `--date` formatting rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', @@ -61,26 +61,26 @@ def apply_patch(text, patch): ), ( # Do not split URLs rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s', - lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')) + lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')), ), ( # Do not split "words" rf'(?m)({delim}\S+)+$', - lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))) + lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))), ), ( # Allow overshooting last line rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})', lambda mobj: (mobj.group().replace(delim, ' ') if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT - else mobj.group()) + else mobj.group()), ), ( # Avoid newline when a space is available b/w switch and description DISABLE_PATCH, # This creates issues with prepare_manpage r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), - r'\1 ' + r'\1 ', ), ( # Replace brackets with a Markdown link r'SponsorBlock API \((http.+)\)', - r'[SponsorBlock API](\1)' + r'[SponsorBlock API](\1)', ), ) diff --git a/devscripts/set-variant.py b/devscripts/set-variant.py index 10341e7444ef..24ce4552d5e0 100644 --- a/devscripts/set-variant.py +++ b/devscripts/set-variant.py @@ -30,7 +30,7 @@ def property_setter(name, value): opts = parse_options() transform = compose_functions( property_setter('VARIANT', opts.variant), - property_setter('UPDATE_HINT', opts.update_message) + property_setter('UPDATE_HINT', opts.update_message), ) write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) diff --git a/devscripts/update-version.py b/devscripts/update-version.py index 07a071745866..2018ba8440c9 100644 --- a/devscripts/update-version.py +++ b/devscripts/update-version.py @@ -24,7 +24,7 @@ def get_new_version(version, revision): else: old_version = read_version().split('.') if version.split('.') == old_version[:3]: - revision = str(int((old_version + [0])[3]) + 1) + revision = str(int(([*old_version, 0])[3]) + 1) return f'{version}.{revision}' if revision else version diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 267af5f6ec7f..8e190c00cbcd 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -9,15 +9,15 @@ import yt_dlp -ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" -ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" +ZSH_COMPLETION_FILE = 'completions/zsh/_yt-dlp' +ZSH_COMPLETION_TEMPLATE = 'devscripts/zsh-completion.in' def build_completion(opt_parser): opts = [opt for group in opt_parser.option_groups for opt in group.option_list] - opts_file = [opt for opt in opts if opt.metavar == "FILE"] - opts_dir = [opt for opt in opts if opt.metavar == "DIR"] + opts_file = [opt for opt in opts if opt.metavar == 'FILE'] + opts_dir = [opt for opt in opts if opt.metavar == 'DIR'] fileopts = [] for opt in opts_file: @@ -38,11 +38,11 @@ def build_completion(opt_parser): with open(ZSH_COMPLETION_TEMPLATE) as f: template = f.read() - template = template.replace("{{fileopts}}", "|".join(fileopts)) - template = template.replace("{{diropts}}", "|".join(diropts)) - template = template.replace("{{flags}}", " ".join(flags)) + template = template.replace('{{fileopts}}', '|'.join(fileopts)) + template = template.replace('{{diropts}}', '|'.join(diropts)) + template = template.replace('{{flags}}', ' '.join(flags)) - with open(ZSH_COMPLETION_FILE, "w") as f: + with open(ZSH_COMPLETION_FILE, 'w') as f: f.write(template) diff --git a/pyproject.toml b/pyproject.toml index da6403ec7cc6..01162b794c2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,21 +183,84 @@ line-length = 120 [tool.ruff.lint] ignore = [ - "E402", # module level import not at top of file - "E501", # line too long - "E731", # do not assign a lambda expression, use a def - "E741", # ambiguous variable name + "E402", # module-import-not-at-top-of-file + "E501", # line-too-long + "E731", # lambda-assignment + "E741", # ambiguous-variable-name + "UP036", # outdated-version-block + "B006", # mutable-argument-default + "B008", # function-call-in-default-argument + "B011", # assert-false + "B017", # assert-raises-exception + "B023", # function-uses-loop-variable (false positives) + "B028", # no-explicit-stacklevel + "B904", # raise-without-from-inside-except + "C401", # unnecessary-generator-set + "C402", # unnecessary-generator-dict + "PIE790", # unnecessary-placeholder + "SIM102", # collapsible-if + "SIM108", # if-else-block-instead-of-if-exp + "SIM112", # uncapitalized-environment-variables + "SIM113", # enumerate-for-loop + "SIM114", # if-with-same-arms + "SIM115", # open-file-with-context-handler + "SIM117", # multiple-with-statements + "SIM223", # expr-and-false + "SIM300", # yoda-conditions + "TD001", # invalid-todo-tag + "TD002", # missing-todo-author + "TD003", # missing-todo-link + "PLE0604", # invalid-all-object (false positives) + "PLW0603", # global-statement + "PLW1510", # subprocess-run-without-check + "PLW2901", # redefined-loop-name + "RUF001", # ambiguous-unicode-character-string + "RUF012", # mutable-class-default + "RUF100", # unused-noqa (flake8 has slightly different behavior) ] select = [ - "E", # pycodestyle errors - "W", # pycodestyle warnings - "F", # pyflakes - "I", # import order + "E", # pycodestyle Error + "W", # pycodestyle Warning + "F", # Pyflakes + "I", # isort + "Q", # flake8-quotes + "N803", # invalid-argument-name + "N804", # invalid-first-argument-name-for-class-method + "UP", # pyupgrade + "B", # flake8-bugbear + "A", # flake8-builtins + "COM", # flake8-commas + "C4", # flake8-comprehensions + "FA", # flake8-future-annotations + "ISC", # flake8-implicit-str-concat + "ICN003", # banned-import-from + "PIE", # flake8-pie + "T20", # flake8-print + "RSE", # flake8-raise + "RET504", # unnecessary-assign + "SIM", # flake8-simplify + "TID251", # banned-api + "TD", # flake8-todos + "PLC", # Pylint Convention + "PLE", # Pylint Error + "PLW", # Pylint Warning + "RUF", # Ruff-specific rules ] [tool.ruff.lint.per-file-ignores] -"devscripts/lazy_load_template.py" = ["F401"] -"!yt_dlp/extractor/**.py" = ["I"] +"devscripts/lazy_load_template.py" = [ + "F401", # unused-import +] +"!yt_dlp/extractor/**.py" = [ + "I", # isort + "ICN003", # banned-import-from + "T20", # flake8-print + "A002", # builtin-argument-shadowing + "C408", # unnecessary-collection-call +] +"yt_dlp/jsinterp.py" = [ + "UP031", # printf-string-formatting +] [tool.ruff.lint.isort] known-first-party = [ @@ -207,6 +270,50 @@ known-first-party = [ ] relative-imports-order = "closest-to-furthest" +[tool.ruff.lint.flake8-quotes] +docstring-quotes = "double" +multiline-quotes = "single" +inline-quotes = "single" +avoid-escape = false + +[tool.ruff.lint.pep8-naming] +classmethod-decorators = [ + "yt_dlp.utils.classproperty", +] + +[tool.ruff.lint.flake8-import-conventions] +banned-from = [ + "base64", + "datetime", + "functools", + "glob", + "hashlib", + "itertools", + "json", + "math", + "os", + "pathlib", + "random", + "re", + "string", + "sys", + "time", + "urllib", + "uuid", + "xml", +] + +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"yt_dlp.compat.compat_str".msg = "Use `str` instead." +"yt_dlp.compat.compat_b64decode".msg = "Use `base64.b64decode` instead." +"yt_dlp.compat.compat_urlparse".msg = "Use `urllib.parse` instead." +"yt_dlp.compat.compat_parse_qs".msg = "Use `urllib.parse.parse_qs` instead." +"yt_dlp.compat.compat_urllib_parse_unquote".msg = "Use `urllib.parse.unquote` instead." +"yt_dlp.compat.compat_urllib_parse_urlencode".msg = "Use `urllib.parse.urlencode` instead." +"yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." +"yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." +"yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." + [tool.autopep8] max_line_length = 120 recursive = true diff --git a/test/conftest.py b/test/conftest.py index decd2c85c885..a8b92f811eb8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -22,8 +22,8 @@ def handler(request): class HandlerWrapper(handler): RH_KEY = handler.RH_KEY - def __init__(self, *args, **kwargs): - super().__init__(logger=FakeLogger, *args, **kwargs) + def __init__(self, **kwargs): + super().__init__(logger=FakeLogger, **kwargs) return HandlerWrapper @@ -54,11 +54,11 @@ def skip_handlers_if(request, handler): def pytest_configure(config): config.addinivalue_line( - "markers", "skip_handler(handler): skip test for the given handler", + 'markers', 'skip_handler(handler): skip test for the given handler', ) config.addinivalue_line( - "markers", "skip_handler_if(handler): skip test for the given handler if condition is true" + 'markers', 'skip_handler_if(handler): skip test for the given handler if condition is true', ) config.addinivalue_line( - "markers", "skip_handlers_if(handler): skip test for handlers when the condition is true" + 'markers', 'skip_handlers_if(handler): skip test for handlers when the condition is true', ) diff --git a/test/helper.py b/test/helper.py index e7473120d1b2..3b550d192719 100644 --- a/test/helper.py +++ b/test/helper.py @@ -16,8 +16,8 @@ import pytest is_download_test = pytest.mark.download else: - def is_download_test(testClass): - return testClass + def is_download_test(test_class): + return test_class def get_params(override=None): @@ -45,10 +45,10 @@ def try_rm(filename): def report_warning(message, *args, **kwargs): - ''' + """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored - ''' + """ if sys.stderr.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: @@ -138,15 +138,14 @@ def expect_value(self, got, expected, field): elif isinstance(expected, list) and isinstance(got, list): self.assertEqual( len(expected), len(got), - 'Expect a list of length %d, but got a list of length %d for field %s' % ( - len(expected), len(got), field)) + f'Expect a list of length {len(expected)}, but got a list of length {len(got)} for field {field}') for index, (item_got, item_expected) in enumerate(zip(got, expected)): type_got = type(item_got) type_expected = type(item_expected) self.assertEqual( type_expected, type_got, - 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( - index, field, type_expected, type_got)) + f'Type mismatch for list item at index {index} for field {field}, ' + f'expected {type_expected!r}, got {type_got!r}') expect_value(self, item_got, item_expected, field) else: if isinstance(expected, str) and expected.startswith('md5:'): @@ -224,7 +223,7 @@ def sanitize(key, value): test_info_dict.pop('display_id') # Remove deprecated fields - for old in YoutubeDL._deprecated_multivalue_fields.keys(): + for old in YoutubeDL._deprecated_multivalue_fields: test_info_dict.pop(old, None) # release_year may be generated from release_date @@ -246,11 +245,11 @@ def expect_info_dict(self, got_dict, expected_dict): if expected_dict.get('ext'): mandatory_fields.extend(('url', 'ext')) for key in mandatory_fields: - self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + self.assertTrue(got_dict.get(key), f'Missing mandatory field {key}') # Check for mandatory fields that are automatically set by YoutubeDL if got_dict.get('_type', 'video') == 'video': for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + self.assertTrue(got_dict.get(key), f'Missing field: {key}') test_info_dict = sanitize_got_info_dict(got_dict) @@ -258,7 +257,7 @@ def expect_info_dict(self, got_dict, expected_dict): if missing_keys: def _repr(v): if isinstance(v, str): - return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + return "'{}'".format(v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')) elif isinstance(v, type): return v.__name__ else: @@ -275,8 +274,7 @@ def _repr(v): write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr) self.assertFalse( missing_keys, - 'Missing keys in test definition: %s' % ( - ', '.join(sorted(missing_keys)))) + 'Missing keys in test definition: {}'.format(', '.join(sorted(missing_keys)))) def assertRegexpMatches(self, text, regexp, msg=None): @@ -285,9 +283,9 @@ def assertRegexpMatches(self, text, regexp, msg=None): else: m = re.match(regexp, text) if not m: - note = 'Regexp didn\'t match: %r not found' % (regexp) + note = f'Regexp didn\'t match: {regexp!r} not found' if len(text) < 1000: - note += ' in %r' % text + note += f' in {text!r}' if msg is None: msg = note else: @@ -310,7 +308,7 @@ def assertLessEqual(self, got, expected, msg=None): def assertEqual(self, got, expected, msg=None): - if not (got == expected): + if got != expected: if msg is None: msg = f'{got!r} not equal to {expected!r}' self.assertTrue(got == expected, msg) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 744587e45b0d..31e8f82448d4 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -262,19 +262,19 @@ def test_search_json_ld_realworld(self): ''', { 'chapters': [ - {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440}, - {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179}, - {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263}, - {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367}, - {"title": "Zacht weer", "start_time": 1367, "end_time": 1383}, - {"title": "Financiële balans", "start_time": 1383, "end_time": 1484}, - {"title": "Club Brugge", "start_time": 1484, "end_time": 1575}, - {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728}, - {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873}, - {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23} + {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440}, + {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179}, + {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263}, + {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367}, + {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383}, + {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484}, + {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575}, + {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728}, + {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873}, + {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23}, ], - 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)' - }, {} + 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)', + }, {}, ), ( # test multiple thumbnails in a list @@ -301,13 +301,13 @@ def test_search_json_ld_realworld(self): 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, - ) + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( self, self.ie._search_json_ld(html, None, **search_json_ld_kwargs), - expected_dict + expected_dict, ) def test_download_json(self): @@ -366,7 +366,7 @@ def test_parse_html5_media_entries(self): 'height': 740, 'tbr': 1500, }], - 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg' + 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg', }) # from https://www.csfd.cz/ @@ -419,9 +419,9 @@ def test_parse_html5_media_entries(self): 'height': 1080, }], 'subtitles': { - 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}] + 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}], }, - 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360' + 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360', }) # from https://tamasha.com/v/Kkdjw @@ -452,7 +452,7 @@ def test_parse_html5_media_entries(self): 'ext': 'mp4', 'format_id': '144p', 'height': 144, - }] + }], }) # from https://www.directvnow.com @@ -470,7 +470,7 @@ def test_parse_html5_media_entries(self): 'formats': [{ 'ext': 'mp4', 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', - }] + }], }) # from https://www.directvnow.com @@ -488,7 +488,7 @@ def test_parse_html5_media_entries(self): 'formats': [{ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', 'ext': 'mp4', - }] + }], }) # from https://www.klarna.com/uk/ @@ -547,8 +547,8 @@ def test_extract_jwplayer_data_realworld(self): 'id': 'XEgvuql4', 'formats': [{ 'url': 'rtmp://192.138.214.154/live/sjclive', - 'ext': 'flv' - }] + 'ext': 'flv', + }], }) # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ @@ -588,8 +588,8 @@ def test_extract_jwplayer_data_realworld(self): 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', 'formats': [{ 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', - 'ext': 'flv' - }] + 'ext': 'flv', + }], }) # from http://www.indiedb.com/games/king-machine/videos @@ -610,12 +610,12 @@ def test_extract_jwplayer_data_realworld(self): 'formats': [{ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', 'height': 360, - 'ext': 'mp4' + 'ext': 'mp4', }, { 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', 'height': 720, - 'ext': 'mp4' - }] + 'ext': 'mp4', + }], }) def test_parse_m3u8_formats(self): @@ -866,7 +866,7 @@ def test_parse_m3u8_formats(self): 'height': 1080, 'vcodec': 'avc1.64002a', }], - {} + {}, ), ( 'bipbop_16x9', @@ -990,45 +990,45 @@ def test_parse_m3u8_formats(self): 'en': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], 'fr': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], 'es': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], 'ja': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], - } + }, ), ] for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: - with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: + with open(f'./test/testdata/m3u8/{m3u8_file}.m3u8', encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -1366,14 +1366,14 @@ def test_parse_mpd_formats(self): 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', - } - ] + }, + ], }, - ) + ), ] for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: - with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: + with open(f'./test/testdata/mpd/{mpd_file}.mpd', encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) @@ -1408,7 +1408,7 @@ def test_parse_ism_formats(self): 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-100', @@ -1431,7 +1431,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-326', @@ -1454,7 +1454,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-698', @@ -1477,7 +1477,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-1493', @@ -1500,7 +1500,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-4482', @@ -1523,7 +1523,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }], { @@ -1538,10 +1538,10 @@ def test_parse_ism_formats(self): 'duration': 8880746666, 'timescale': 10000000, 'fourcc': 'TTML', - 'codec_private_data': '' - } - } - ] + 'codec_private_data': '', + }, + }, + ], }, ), ( @@ -1571,7 +1571,7 @@ def test_parse_ism_formats(self): 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'audio_deu_1-224', @@ -1597,7 +1597,7 @@ def test_parse_ism_formats(self): 'sampling_rate': 48000, 'channels': 6, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-23', @@ -1622,7 +1622,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-403', @@ -1647,7 +1647,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-680', @@ -1672,7 +1672,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-1253', @@ -1698,7 +1698,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-2121', @@ -1723,7 +1723,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-3275', @@ -1748,7 +1748,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-5300', @@ -1773,7 +1773,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-8079', @@ -1798,7 +1798,7 @@ def test_parse_ism_formats(self): 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }], {}, @@ -1806,7 +1806,7 @@ def test_parse_ism_formats(self): ] for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: - with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: + with open(f'./test/testdata/ism/{ism_file}.Manifest', encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) @@ -1827,12 +1827,12 @@ def test_parse_f4m_formats(self): 'tbr': 2148, 'width': 1280, 'height': 720, - }] + }], ), ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: + with open(f'./test/testdata/f4m/{f4m_file}.f4m', encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode()), f4m_url, None) @@ -1873,13 +1873,13 @@ def test_parse_xspf(self): }, { 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.com/track3.mp3', - }] - }] + }], + }], ), ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: + with open(f'./test/testdata/xspf/{xspf_file}.xspf', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) @@ -1902,7 +1902,7 @@ def test_response_with_expected_status_returns_content(self): server_thread.start() (content, urlh) = self.ie._download_webpage_handle( - 'http://127.0.0.1:%d/teapot' % port, None, + f'http://127.0.0.1:{port}/teapot', None, expected_status=TEAPOT_RESPONSE_STATUS) self.assertEqual(content, TEAPOT_RESPONSE_BODY) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5242cf88f92c..841ce1af3e75 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import contextlib import copy import json @@ -129,8 +130,8 @@ def test(inp, *expected, multi=False): 'allow_multiple_audio_streams': multi, }) ydl.process_ie_result(info_dict.copy()) - downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts) - self.assertEqual(list(downloaded), list(expected)) + downloaded = [x['format_id'] for x in ydl.downloaded_info_dicts] + self.assertEqual(downloaded, list(expected)) test('20/47', '47') test('20/71/worst', '35') @@ -515,10 +516,8 @@ def test_format_filtering(self): self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) - try: + with contextlib.suppress(ExtractorError): ydl.process_ie_result(info_dict) - except ExtractorError: - pass self.assertEqual(ydl.downloaded_info_dicts, []) def test_default_format_spec(self): @@ -652,8 +651,8 @@ def test_add_extra_info(self): 'formats': [ {'id': 'id 1', 'height': 1080, 'width': 1920}, {'id': 'id 2', 'height': 720}, - {'id': 'id 3'} - ] + {'id': 'id 3'}, + ], } def test_prepare_outtmpl_and_filename(self): @@ -773,7 +772,7 @@ def expect_same_infodict(out): test('%(formats)j', (json.dumps(FORMATS), None)) test('%(formats)#j', ( json.dumps(FORMATS, indent=4), - json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', """).replace('\n', ' ') + json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', '"').replace('\n', ' '), )) test('%(title5).3B', 'á') test('%(title5)U', 'áéí 𝐀') @@ -843,8 +842,8 @@ def gen(): # Empty filename test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') - # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme - # test('%(foo|)s', ('', '_')) # fixme + # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # FIXME: ? + # test('%(foo|)s', ('', '_')) # FIXME: ? # Environment variable expansion for prepare_filename os.environ['__yt_dlp_var'] = 'expanded' @@ -861,7 +860,7 @@ def gen(): test('Hello %(title1)s', 'Hello $PATH') test('Hello %(title2)s', 'Hello %PATH%') test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test')) - test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep)) + test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test')) def test_format_note(self): ydl = YoutubeDL() @@ -883,22 +882,22 @@ def run(self, info): f.write('EXAMPLE') return [info['filepath']], info - def run_pp(params, PP): + def run_pp(params, pp): with open(filename, 'w') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) - ydl.add_post_processor(PP()) + ydl.add_post_processor(pp()) ydl.post_process(filename, {'filepath': filename}) run_pp({'keepvideo': True}, SimplePP) - self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) - self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist') + self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist') os.unlink(filename) os.unlink(audiofile) run_pp({'keepvideo': False}, SimplePP) - self.assertFalse(os.path.exists(filename), '%s exists' % filename) - self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + self.assertFalse(os.path.exists(filename), f'{filename} exists') + self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist') os.unlink(audiofile) class ModifierPP(PostProcessor): @@ -908,7 +907,7 @@ def run(self, info): return [], info run_pp({'keepvideo': False}, ModifierPP) - self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist') os.unlink(filename) def test_match_filter(self): @@ -920,7 +919,7 @@ def test_match_filter(self): 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', - 'uploader': "變態妍字幕版 太妍 тест", + 'uploader': '變態妍字幕版 太妍 тест', 'creator': "тест ' 123 ' тест--", 'webpage_url': 'http://example.com/watch?v=shenanigans', } @@ -933,7 +932,7 @@ def test_match_filter(self): 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', - 'uploader': "тест 123", + 'uploader': 'тест 123', 'webpage_url': 'http://example.com/watch?v=SHENANIGANS', } videos = [first, second] @@ -1180,7 +1179,7 @@ def _real_extract(self, url): }) return { 'id': video_id, - 'title': 'Video %s' % video_id, + 'title': f'Video {video_id}', 'formats': formats, } @@ -1194,8 +1193,8 @@ def _entries(self): '_type': 'url_transparent', 'ie_key': VideoIE.ie_key(), 'id': video_id, - 'url': 'video:%s' % video_id, - 'title': 'Video Transparent %s' % video_id, + 'url': f'video:{video_id}', + 'title': f'Video Transparent {video_id}', } def _real_extract(self, url): diff --git a/test/test_aes.py b/test/test_aes.py index a26abfd7d0e4..5f975efecfa4 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -87,7 +87,7 @@ def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) - + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) @@ -95,7 +95,7 @@ def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) - + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) @@ -132,16 +132,16 @@ def test_pad_block(self): block = [0x21, 0xA0, 0x43, 0xFF] self.assertEqual(pad_block(block, 'pkcs7'), - block + [0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) + [*block, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) self.assertEqual(pad_block(block, 'iso7816'), - block + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + [*block, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) self.assertEqual(pad_block(block, 'whitespace'), - block + [0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) + [*block, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) self.assertEqual(pad_block(block, 'zero'), - block + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + [*block, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) block = list(range(16)) for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'): diff --git a/test/test_compat.py b/test/test_compat.py index 71ca7f99f10c..e7d97e3e93e7 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -15,8 +15,8 @@ from yt_dlp.compat import ( compat_etree_fromstring, compat_expanduser, - compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, + compat_urllib_parse_unquote, # noqa: TID251 + compat_urllib_parse_urlencode, # noqa: TID251 ) from yt_dlp.compat.urllib.request import getproxies @@ -24,15 +24,15 @@ class TestCompat(unittest.TestCase): def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): - compat.compat_basestring + _ = compat.compat_basestring with self.assertWarns(DeprecationWarning): - compat.WINDOWS_VT_MODE + _ = compat.WINDOWS_VT_MODE self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): - compat.compat_pycrypto_AES # Must not raise error + _ = compat.compat_pycrypto_AES # Must not raise error def test_compat_expanduser(self): old_home = os.environ.get('HOME') diff --git a/test/test_config.py b/test/test_config.py index a393b653483e..238ca66d08a0 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -71,7 +71,7 @@ def _generate_expected_groups(): Path('/etc/yt-dlp.conf'), Path('/etc/yt-dlp/config'), Path('/etc/yt-dlp/config.txt'), - ] + ], } diff --git a/test/test_cookies.py b/test/test_cookies.py index bd61f30a660d..a682fee1d382 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -106,7 +106,7 @@ def test_chrome_cookie_decryptor_linux_v11(self): def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { - '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&' + '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&', }): encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad=' value = '32101439' @@ -121,17 +121,17 @@ def test_chrome_cookie_decryptor_mac_v10(self): self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_safari_cookie_parsing(self): - cookies = \ - b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \ - b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \ - b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \ - b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \ - b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \ - b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(' + cookies = ( + b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' + b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' + b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' + b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' + b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(') jar = parse_safari_cookies(cookies) self.assertEqual(len(jar), 1) - cookie = list(jar)[0] + cookie = next(iter(jar)) self.assertEqual(cookie.domain, 'localhost') self.assertEqual(cookie.port, None) self.assertEqual(cookie.path, '/') @@ -164,7 +164,7 @@ def _run_tests(self, *cases): attributes = { key: value for key, value in dict(morsel).items() - if value != "" + if value != '' } self.assertEqual(attributes, expected_attributes, message) @@ -174,133 +174,133 @@ def test_parsing(self): self._run_tests( # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py ( - "Test basic cookie", - "chips=ahoy; vienna=finger", - {"chips": "ahoy", "vienna": "finger"}, + 'Test basic cookie', + 'chips=ahoy; vienna=finger', + {'chips': 'ahoy', 'vienna': 'finger'}, ), ( - "Test quoted cookie", + 'Test quoted cookie', 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', - {"keebler": 'E=mc2; L="Loves"; fudge=\012;'}, + {'keebler': 'E=mc2; L="Loves"; fudge=\012;'}, ), ( "Allow '=' in an unquoted value", - "keebler=E=mc2", - {"keebler": "E=mc2"}, + 'keebler=E=mc2', + {'keebler': 'E=mc2'}, ), ( "Allow cookies with ':' in their name", - "key:term=value:term", - {"key:term": "value:term"}, + 'key:term=value:term', + {'key:term': 'value:term'}, ), ( "Allow '[' and ']' in cookie values", - "a=b; c=[; d=r; f=h", - {"a": "b", "c": "[", "d": "r", "f": "h"}, + 'a=b; c=[; d=r; f=h', + {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'}, ), ( - "Test basic cookie attributes", + 'Test basic cookie attributes', 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme', - {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}, + {'Customer': ('WILE_E_COYOTE', {'version': '1', 'path': '/acme'})}, ), ( - "Test flag only cookie attributes", + 'Test flag only cookie attributes', 'Customer="WILE_E_COYOTE"; HttpOnly; Secure', - {"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})}, + {'Customer': ('WILE_E_COYOTE', {'httponly': True, 'secure': True})}, ), ( - "Test flag only attribute with values", - "eggs=scrambled; httponly=foo; secure=bar; Path=/bacon", - {"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})}, + 'Test flag only attribute with values', + 'eggs=scrambled; httponly=foo; secure=bar; Path=/bacon', + {'eggs': ('scrambled', {'httponly': 'foo', 'secure': 'bar', 'path': '/bacon'})}, ), ( "Test special case for 'expires' attribute, 4 digit year", 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT', - {"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})}, + {'Customer': ('W', {'expires': 'Wed, 01 Jan 2010 00:00:00 GMT'})}, ), ( "Test special case for 'expires' attribute, 2 digit year", 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT', - {"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})}, + {'Customer': ('W', {'expires': 'Wed, 01 Jan 98 00:00:00 GMT'})}, ), ( - "Test extra spaces in keys and values", - "eggs = scrambled ; secure ; path = bar ; foo=foo ", - {"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"}, + 'Test extra spaces in keys and values', + 'eggs = scrambled ; secure ; path = bar ; foo=foo ', + {'eggs': ('scrambled', {'secure': True, 'path': 'bar'}), 'foo': 'foo'}, ), ( - "Test quoted attributes", + 'Test quoted attributes', 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"', - {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})} + {'Customer': ('WILE_E_COYOTE', {'version': '1', 'path': '/acme'})}, ), # Our own tests that CPython passes ( "Allow ';' in quoted value", 'chips="a;hoy"; vienna=finger', - {"chips": "a;hoy", "vienna": "finger"}, + {'chips': 'a;hoy', 'vienna': 'finger'}, ), ( - "Keep only the last set value", - "a=c; a=b", - {"a": "b"}, + 'Keep only the last set value', + 'a=c; a=b', + {'a': 'b'}, ), ) def test_lenient_parsing(self): self._run_tests( ( - "Ignore and try to skip invalid cookies", + 'Ignore and try to skip invalid cookies', 'chips={"ahoy;": 1}; vienna="finger;"', - {"vienna": "finger;"}, + {'vienna': 'finger;'}, ), ( - "Ignore cookies without a name", - "a=b; unnamed; c=d", - {"a": "b", "c": "d"}, + 'Ignore cookies without a name', + 'a=b; unnamed; c=d', + {'a': 'b', 'c': 'd'}, ), ( "Ignore '\"' cookie without name", 'a=b; "; c=d', - {"a": "b", "c": "d"}, + {'a': 'b', 'c': 'd'}, ), ( - "Skip all space separated values", - "x a=b c=d x; e=f", - {"a": "b", "c": "d", "e": "f"}, + 'Skip all space separated values', + 'x a=b c=d x; e=f', + {'a': 'b', 'c': 'd', 'e': 'f'}, ), ( - "Skip all space separated values", + 'Skip all space separated values', 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x', - {"a": "b", "c": "d"}, + {'a': 'b', 'c': 'd'}, ), ( - "Expect quote mending", + 'Expect quote mending', 'a=b; invalid="; c=d', - {"a": "b", "c": "d"}, + {'a': 'b', 'c': 'd'}, ), ( - "Reset morsel after invalid to not capture attributes", - "a=b; invalid; Version=1; c=d", - {"a": "b", "c": "d"}, + 'Reset morsel after invalid to not capture attributes', + 'a=b; invalid; Version=1; c=d', + {'a': 'b', 'c': 'd'}, ), ( - "Reset morsel after invalid to not capture attributes", - "a=b; $invalid; $Version=1; c=d", - {"a": "b", "c": "d"}, + 'Reset morsel after invalid to not capture attributes', + 'a=b; $invalid; $Version=1; c=d', + {'a': 'b', 'c': 'd'}, ), ( - "Continue after non-flag attribute without value", - "a=b; path; Version=1; c=d", - {"a": "b", "c": "d"}, + 'Continue after non-flag attribute without value', + 'a=b; path; Version=1; c=d', + {'a': 'b', 'c': 'd'}, ), ( - "Allow cookie attributes with `$` prefix", + 'Allow cookie attributes with `$` prefix', 'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme', - {"Customer": ("WILE_E_COYOTE", {"version": "1", "secure": True, "path": "/acme"})}, + {'Customer': ('WILE_E_COYOTE', {'version': '1', 'secure': True, 'path': '/acme'})}, ), ( - "Invalid Morsel keys should not result in an error", - "Key=Value; [Invalid]=Value; Another=Value", - {"Key": "Value", "Another": "Value"}, + 'Invalid Morsel keys should not result in an error', + 'Key=Value; [Invalid]=Value; Another=Value', + {'Key': 'Value', 'Another': 'Value'}, ), ) diff --git a/test/test_download.py b/test/test_download.py index 2530792493ae..882d5456500e 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -94,7 +94,7 @@ def test_template(self): 'playlist', [] if is_playlist else [test_case]) def print_skipping(reason): - print('Skipping %s: %s' % (test_case['name'], reason)) + print('Skipping {}: {}'.format(test_case['name'], reason)) self.skipTest(reason) if not ie.working(): @@ -117,7 +117,7 @@ def print_skipping(reason): for other_ie in other_ies: if not other_ie.working(): - print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) + print_skipping(f'test depends on {other_ie.ie_key()}IE, marked as not WORKING') params = get_params(test_case.get('params', {})) params['outtmpl'] = tname + '_' + params['outtmpl'] @@ -148,10 +148,7 @@ def match_exception(err): return False if err.__class__.__name__ == expected_exception: return True - for exc in err.exc_info: - if exc.__class__.__name__ == expected_exception: - return True - return False + return any(exc.__class__.__name__ == expected_exception for exc in err.exc_info) def try_rm_tcs_files(tcs=None): if tcs is None: @@ -181,7 +178,7 @@ def try_rm_tcs_files(tcs=None): raise if try_num == RETRIES: - report_warning('%s failed due to network errors, skipping...' % tname) + report_warning(f'{tname} failed due to network errors, skipping...') return print(f'Retrying: {try_num} failed tries\n\n##########\n\n') @@ -244,9 +241,8 @@ def try_rm_tcs_files(tcs=None): got_fsize = os.path.getsize(tc_filename) assertGreaterEqual( self, got_fsize, expected_minsize, - 'Expected %s to be at least %s, but it\'s only %s ' % - (tc_filename, format_bytes(expected_minsize), - format_bytes(got_fsize))) + f'Expected {tc_filename} to be at least {format_bytes(expected_minsize)}, ' + f'but it\'s only {format_bytes(got_fsize)} ') if 'md5' in tc: md5_for_file = _file_md5(tc_filename) self.assertEqual(tc['md5'], md5_for_file) @@ -255,7 +251,7 @@ def try_rm_tcs_files(tcs=None): info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue( os.path.exists(info_json_fn), - 'Missing info file %s' % info_json_fn) + f'Missing info file {info_json_fn}') with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 099ec2fff49b..faba0bc9c8d6 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -38,9 +38,9 @@ def send_content_range(self, total=None): end = int(mobj.group(2)) valid_range = start is not None and end is not None if valid_range: - content_range = 'bytes %d-%d' % (start, end) + content_range = f'bytes {start}-{end}' if total: - content_range += '/%d' % total + content_range += f'/{total}' self.send_header('Content-Range', content_range) return (end - start + 1) if valid_range else total @@ -84,7 +84,7 @@ def download(self, params, ep): filename = 'testfile.mp4' try_rm(encodeFilename(filename)) self.assertTrue(downloader.real_download(filename, { - 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), + 'url': f'http://127.0.0.1:{self.port}/{ep}', }), ep) self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index 1b21fe78e871..2435c878a51b 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -105,7 +105,7 @@ def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eo self.incoming, self.outgoing, server_hostname=server_hostname, - server_side=server_side + server_side=server_side, ) self._ssl_io_loop(self.sslobj.do_handshake) @@ -333,7 +333,7 @@ def test_http_connect_auth(self, handler, ctx): @pytest.mark.skip_handler( 'Requests', - 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374' + 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374', ) def test_http_connect_bad_auth(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index 47c632a4e2e5..4e41007c8211 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -29,11 +29,11 @@ def error(self, msg): @is_download_test class TestIqiyiSDKInterpreter(unittest.TestCase): def test_iqiyi_sdk_interpreter(self): - ''' + """ Test the functionality of IqiyiSDKInterpreter by trying to log in If `sign` is incorrect, /validate call throws an HTTP 556 error - ''' + """ logger = WarningLogger() ie = IqiyiIE(FakeYDL({'logger': logger})) ie._perform_login('foo', 'bar') diff --git a/test/test_netrc.py b/test/test_netrc.py index dc708d974cf3..1e0f4ee3b27b 100644 --- a/test/test_netrc.py +++ b/test/test_netrc.py @@ -21,7 +21,7 @@ def test_netrc_present(self): continue self.assertTrue( ie._NETRC_MACHINE, - 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) + f'Extractor {ie.IE_NAME} supports login, but is missing a _NETRC_MACHINE property') if __name__ == '__main__': diff --git a/test/test_networking.py b/test/test_networking.py index d127cbb94c46..af3ece3b44e4 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -375,10 +375,10 @@ def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): with pytest.raises(HTTPError): - validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status))) + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_{bad_status}')) # Should not raise an error - validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')).close() def test_response_url(self, handler): with handler() as rh: @@ -472,7 +472,7 @@ def test_redirect_loop(self, handler): def test_incompleteread(self, handler): with handler(timeout=2) as rh: with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): - validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/incompleteread')).read() def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() @@ -740,7 +740,7 @@ class TestRequestHandlerMisc: @pytest.mark.parametrize('handler,logger_name', [ ('Requests', 'urllib3'), ('Websockets', 'websockets.client'), - ('Websockets', 'websockets.server') + ('Websockets', 'websockets.server'), ], indirect=['handler']) def test_remove_logging_handler(self, handler, logger_name): # Ensure any logging handlers, which may contain a YoutubeDL instance, @@ -794,7 +794,7 @@ def test_verify_cert_error_text(self, handler): with handler() as rh: with pytest.raises( CertificateVerifyError, - match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate' + match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate', ): validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) @@ -804,14 +804,14 @@ def test_verify_cert_error_text(self, handler): ( Request('http://127.0.0.1', method='GET\n'), 'method can\'t contain control characters', - lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5) + lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5), ), # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+ ( Request('http://127.0.0. 1', method='GET'), 'URL can\'t contain control characters', - lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3) + lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3), ), # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None), @@ -840,7 +840,7 @@ class TestRequestsRequestHandler(TestRequestHandlerBase): (lambda: requests.exceptions.InvalidHeader(), RequestError), # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535 (lambda: urllib3.exceptions.HTTPError(), TransportError), - (lambda: requests.exceptions.RequestException(), RequestError) + (lambda: requests.exceptions.RequestException(), RequestError), # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object ]) def test_request_error_mapping(self, handler, monkeypatch, raised, expected): @@ -868,12 +868,12 @@ def request(self, *args, **kwargs): ( lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)), IncompleteRead, - '3 bytes read, 4 more expected' + '3 bytes read, 4 more expected', ), ( lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)), IncompleteRead, - '3 bytes read, 5 more expected' + '3 bytes read, 5 more expected', ), ]) def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): @@ -1125,7 +1125,7 @@ class HTTPSupportedRH(ValidationRH): ('https', False, {}), ]), (NoCheckRH, [('http', False, {})]), - (ValidationRH, [('http', UnsupportedRequest, {})]) + (ValidationRH, [('http', UnsupportedRequest, {})]), ] PROXY_SCHEME_TESTS = [ @@ -1219,7 +1219,7 @@ class HTTPSupportedRH(ValidationRH): ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False), ({'impersonate': ImpersonateTarget(None, None, None, None)}, False), ({'impersonate': ImpersonateTarget()}, False), - ({'impersonate': 'chrome'}, AssertionError) + ({'impersonate': 'chrome'}, AssertionError), ]), (NoCheckRH, 'http', [ ({'cookiejar': 'notacookiejar'}, False), @@ -1235,7 +1235,7 @@ class HTTPSupportedRH(ValidationRH): ('Urllib', False, 'http'), ('Requests', False, 'http'), ('CurlCFFI', False, 'http'), - ('Websockets', False, 'ws') + ('Websockets', False, 'ws'), ], indirect=['handler']) def test_no_proxy(self, handler, fail, scheme): run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'})) @@ -1246,7 +1246,7 @@ def test_no_proxy(self, handler, fail, scheme): (HTTPSupportedRH, 'http'), ('Requests', 'http'), ('CurlCFFI', 'http'), - ('Websockets', 'ws') + ('Websockets', 'ws'), ], indirect=['handler']) def test_empty_proxy(self, handler, scheme): run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None})) @@ -1258,7 +1258,7 @@ def test_empty_proxy(self, handler, scheme): (HTTPSupportedRH, 'http'), ('Requests', 'http'), ('CurlCFFI', 'http'), - ('Websockets', 'ws') + ('Websockets', 'ws'), ], indirect=['handler']) def test_invalid_proxy_url(self, handler, scheme, proxy_url): run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url})) @@ -1474,7 +1474,7 @@ def test_compat_opener(self): @pytest.mark.parametrize('proxy,expected', [ ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}), ('', {'all': '__noproxy__'}), - (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https + (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}), # env, set https ]) def test_proxy(self, proxy, expected, monkeypatch): monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081') @@ -1546,7 +1546,7 @@ def _send(self, request: Request): with FakeImpersonationRHYDL() as ydl: with pytest.raises( RequestError, - match=r'Impersonate target "test" is not available' + match=r'Impersonate target "test" is not available', ): ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) @@ -1558,7 +1558,7 @@ def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) - _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc',): 'test'} + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'} _SUPPORTED_PROXY_SCHEMES = None super().__init__(*args, **kwargs) @@ -1567,14 +1567,14 @@ def _send(self, request: Request): with FakeHTTPRHYDL() as ydl: with pytest.raises( RequestError, - match=r'Impersonate target "test" is not available' + match=r'Impersonate target "test" is not available', ): ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) def test_raise_impersonate_error(self): with pytest.raises( YoutubeDLError, - match=r'Impersonate target "test" is not available' + match=r'Impersonate target "test" is not available', ): FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)}) @@ -1592,7 +1592,7 @@ def _send(self, request: Request): monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH])) with FakeYDL({ - 'impersonate': ImpersonateTarget('abc', None, None, None) + 'impersonate': ImpersonateTarget('abc', None, None, None), }) as ydl: rh = self.build_handler(ydl, IRH) assert rh.impersonate == ImpersonateTarget('abc', None, None, None) @@ -1604,7 +1604,7 @@ class TestRH(ImpersonateRequestHandler): def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) - _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client,): 'test'} + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client): 'test'} RH_KEY = target_client RH_NAME = target_client handlers.append(TestRH) @@ -1614,7 +1614,7 @@ def _send(self, request: Request): assert set(ydl._get_available_impersonate_targets()) == { (ImpersonateTarget('xyz'), 'xyz'), (ImpersonateTarget('abc'), 'abc'), - (ImpersonateTarget('asd'), 'asd') + (ImpersonateTarget('asd'), 'asd'), } assert ydl._impersonate_target_available(ImpersonateTarget('abc')) assert ydl._impersonate_target_available(ImpersonateTarget()) @@ -1837,7 +1837,7 @@ def test_copy(self): extensions={'cookiejar': CookieJar()}, headers={'Accept-Encoding': 'br'}, proxies={'http': 'http://127.0.0.1'}, - data=[b'123'] + data=[b'123'], ) req_copy = req.copy() assert req_copy is not req @@ -1863,7 +1863,7 @@ class AnotherRequest(Request): assert isinstance(req.copy(), AnotherRequest) def test_url(self): - req = Request(url='https://фtest.example.com/ some spaceв?ä=c',) + req = Request(url='https://фtest.example.com/ some spaceв?ä=c') assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c' assert Request(url='//example.com').url == 'http://example.com' @@ -1878,7 +1878,7 @@ class TestResponse: ('custom', 200, 'custom'), (None, 404, 'Not Found'), # fallback status ('', 403, 'Forbidden'), - (None, 999, None) + (None, 999, None), ]) def test_reason(self, reason, status, expected): res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason) @@ -1933,7 +1933,7 @@ def test_target_from_str(self, target_str, expected): @pytest.mark.parametrize('target_str', [ '-120', ':-12.0', '-12:-12', '-:-', - '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:' + '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:', ]) def test_target_from_invalid_str(self, target_str): with pytest.raises(ValueError): @@ -1949,7 +1949,7 @@ def test_target_from_invalid_str(self, target_str): (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'), (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'), (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'), - (ImpersonateTarget('abc', ), 'abc'), + (ImpersonateTarget('abc'), 'abc'), (ImpersonateTarget(None, None, None, None), ''), ]) def test_str(self, target, expected): diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index b7b71430e791..204fe87bdab7 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -39,7 +39,7 @@ def test_select_proxy(self): proxies = { 'all': 'socks5://example.com', 'http': 'http://example.com:1080', - 'no': 'bypass.example.com,yt-dl.org' + 'no': 'bypass.example.com,yt-dl.org', } assert select_proxy('https://example.com', proxies) == proxies['all'] @@ -54,7 +54,7 @@ def test_select_proxy(self): 'port': 1080, 'rdns': True, 'username': None, - 'password': None + 'password': None, }), ('socks5://user:@example.com:5555', { 'proxytype': ProxyType.SOCKS5, @@ -62,7 +62,7 @@ def test_select_proxy(self): 'port': 5555, 'rdns': False, 'username': 'user', - 'password': '' + 'password': '', }), ('socks4://u%40ser:pa%20ss@127.0.0.1:1080', { 'proxytype': ProxyType.SOCKS4, @@ -70,7 +70,7 @@ def test_select_proxy(self): 'port': 1080, 'rdns': False, 'username': 'u@ser', - 'password': 'pa ss' + 'password': 'pa ss', }), ('socks4a://:pa%20ss@127.0.0.1', { 'proxytype': ProxyType.SOCKS4A, @@ -78,8 +78,8 @@ def test_select_proxy(self): 'port': 1080, 'rdns': True, 'username': '', - 'password': 'pa ss' - }) + 'password': 'pa ss', + }), ]) def test_make_socks_proxy_opts(self, socks_proxy, expected): assert make_socks_proxy_opts(socks_proxy) == expected diff --git a/test/test_overwrites.py b/test/test_overwrites.py index 6954c07f90f9..0beafdf12e15 100644 --- a/test/test_overwrites.py +++ b/test/test_overwrites.py @@ -27,7 +27,7 @@ def test_default_overwrites(self): [ sys.executable, 'yt_dlp/__main__.py', '-o', 'test.webm', - 'https://www.youtube.com/watch?v=jNQXAC9IVRw' + 'https://www.youtube.com/watch?v=jNQXAC9IVRw', ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' in sout) @@ -39,7 +39,7 @@ def test_yes_overwrites(self): [ sys.executable, 'yt_dlp/__main__.py', '--yes-overwrites', '-o', 'test.webm', - 'https://www.youtube.com/watch?v=jNQXAC9IVRw' + 'https://www.youtube.com/watch?v=jNQXAC9IVRw', ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' not in sout) diff --git a/test/test_plugins.py b/test/test_plugins.py index 6cde579e1e9c..c82158e9fc35 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -31,7 +31,7 @@ def test_extractor_classes(self): # don't load modules with underscore prefix self.assertFalse( - f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(), + f'{PACKAGE_NAME}.extractor._ignore' in sys.modules, 'loaded module beginning with underscore') self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py index 3778d1794258..6500dd3863de 100644 --- a/test/test_post_hooks.py +++ b/test/test_post_hooks.py @@ -59,7 +59,7 @@ def hook_two(self, filename): def hook_three(self, filename): self.files.append(filename) - raise Exception('Test exception for \'%s\'' % filename) + raise Exception(f'Test exception for \'{filename}\'') def tearDown(self): for f in self.files: diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 52e5587729b6..603f85c654c6 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -9,7 +9,7 @@ from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_shlex_quote +from yt_dlp.utils import shell_quote from yt_dlp.postprocessor import ( ExecPP, FFmpegThumbnailsConvertorPP, @@ -65,7 +65,7 @@ class TestExec(unittest.TestCase): def test_parse_cmd(self): pp = ExecPP(YoutubeDL(), '') info = {'filepath': 'file name'} - cmd = 'echo %s' % compat_shlex_quote(info['filepath']) + cmd = 'echo {}'.format(shell_quote(info['filepath'])) self.assertEqual(pp.parse_cmd('echo', info), cmd) self.assertEqual(pp.parse_cmd('echo {}', info), cmd) @@ -125,7 +125,8 @@ def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self): self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'preview'), self._sponsor_chapter(50, 60, 'filler')] @@ -136,7 +137,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'chapter', title='sb c1'), self._sponsor_chapter(15, 16, 'chapter', title='sb c2'), self._sponsor_chapter(30, 40, 'preview'), @@ -149,10 +151,14 @@ def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): - chapters = self._chapters([120], ['c']) + [ - self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), - self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), - self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] + chapters = [ + *self._chapters([120], ['c']), + self._sponsor_chapter(10, 45, 'sponsor'), + self._sponsor_chapter(20, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), + self._sponsor_chapter(60, 85, 'selfpromo'), + self._sponsor_chapter(90, 120, 'selfpromo'), + self._sponsor_chapter(100, 110, 'sponsor')] expected = self._chapters( [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', @@ -172,7 +178,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithCuts(self): chapters, self._chapters([40], ['c']), cuts) def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'selfpromo', remove=True), self._sponsor_chapter(50, 60, 'interaction')] @@ -185,24 +192,29 @@ def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), self._chapter(40, 50, remove=True)] - chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts + chapters = [ + *self._chapters([70], ['c']), + self._sponsor_chapter(10, 60, 'sponsor'), + *cuts] expected = self._chapters( [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)] - chapters = self._chapters([60], ['c']) + [ + chapters = [ + *self._chapters([60], ['c']), self._sponsor_chapter(10, 20, 'intro'), self._sponsor_chapter(30, 40, 'sponsor'), self._sponsor_chapter(50, 60, 'outro'), - ] + cuts + *cuts] expected = self._chapters( [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'selfpromo'), self._sponsor_chapter(30, 40, 'interaction')] @@ -213,7 +225,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'interaction', remove=True), self._chapter(30, 40, remove=True), @@ -226,7 +239,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): chapters, expected, [self._chapter(20, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'selfpromo'), self._sponsor_chapter(40, 60, 'interaction')] @@ -238,7 +252,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor', remove=True), self._sponsor_chapter(20, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 60, 'interaction', remove=True)] @@ -246,7 +261,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): - chapters = self._chapters([170], ['c']) + [ + chapters = [ + *self._chapters([170], ['c']), self._sponsor_chapter(0, 30, 'intro'), self._sponsor_chapter(20, 50, 'sponsor'), self._sponsor_chapter(40, 60, 'selfpromo'), @@ -267,7 +283,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(sel self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): - chapters = self._chapters([170], ['c']) + [ + chapters = [ + *self._chapters([170], ['c']), self._chapter(0, 30, remove=True), self._sponsor_chapter(20, 50, 'sponsor', remove=True), self._chapter(40, 60, remove=True), @@ -284,7 +301,8 @@ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): chapters, self._chapters([20], ['c']), expected_cuts) def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): - chapters = self._chapters([60], ['c']) + [ + chapters = [ + *self._chapters([60], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(10, 40, 'intro'), self._sponsor_chapter(30, 50, 'interaction'), @@ -297,7 +315,8 @@ def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterC chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True), @@ -310,7 +329,8 @@ def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(20, 60, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] @@ -321,7 +341,8 @@ def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): - chapters = self._chapters([200], ['c']) + [ + chapters = [ + *self._chapters([200], ['c']), self._sponsor_chapter(10, 40, 'sponsor'), self._sponsor_chapter(10, 30, 'intro'), self._chapter(20, 30, remove=True), @@ -347,8 +368,9 @@ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndC self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): - chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) - + [self._sponsor_chapter(10, 90, 'sponsor')]) + chapters = [ + *self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']), + self._sponsor_chapter(10, 90, 'sponsor')] expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -359,9 +381,10 @@ def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): - chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(20, 30, 'sponsor'), - self._sponsor_chapter(50, 70, 'selfpromo')]) + chapters = [ + *self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(20, 30, 'sponsor'), + self._sponsor_chapter(50, 70, 'selfpromo')] expected = self._chapters([10, 20, 30, 40, 50, 70, 80], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) @@ -374,8 +397,9 @@ def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthe self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): - chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(10, 30, 'music_offtopic')]) + chapters = [ + *self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(10, 30, 'music_offtopic')] expected = self._chapters( [10, 30, 40, 50, 60], ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) @@ -388,8 +412,9 @@ def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): - chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(20, 30, 'sponsor')]) + chapters = [ + *self._chapters([10, 20, 40], ['c1', 'c2', 'c3']), + self._sponsor_chapter(20, 30, 'sponsor')] expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -400,8 +425,9 @@ def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): - chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(20, 30, 'sponsor')]) + chapters = [ + *self._chapters([10, 30, 40], ['c1', 'c2', 'c3']), + self._sponsor_chapter(20, 30, 'sponsor')] expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -412,8 +438,9 @@ def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): - chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(10, 30, 'sponsor')]) + chapters = [ + *self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(10, 30, 'sponsor')] expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -424,8 +451,9 @@ def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): - chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')]) + chapters = [ + *self._chapters([20, 40, 60], ['c1', 'c2', 'c3']), + self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')] expected = self._chapters( [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -437,8 +465,10 @@ def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): - chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')]) + chapters = [ + *self._chapters([10, 40, 50], ['c1', 'c2', 'c3']), + self._sponsor_chapter(0, 20, 'intro'), + self._sponsor_chapter(30, 50, 'outro')] expected = self._chapters( [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -450,8 +480,10 @@ def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(sel self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_EverythingSponsored(self): - chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')]) + chapters = [ + *self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(0, 20, 'intro'), + self._sponsor_chapter(20, 40, 'outro')] expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -491,38 +523,39 @@ def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext( chapters, self._chapters([2.5], ['c2']), cuts) def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): - chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ + chapters = [ + *self._chapters([1, 3, 4], ['c1', 'c2', 'c3']), self._sponsor_chapter(1.5, 2.5, 'sponsor')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): - chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ + chapters = [ + *self._chapters([2, 3, 5], ['c1', 'c2', 'c3']), self._sponsor_chapter(1, 3, 'sponsor'), - self._sponsor_chapter(2.5, 4, 'selfpromo') - ] + self._sponsor_chapter(2.5, 4, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1, 3, 4, 5], [ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): - chapters = self._chapters([4], ['c']) + [ + chapters = [ + *self._chapters([4], ['c']), self._sponsor_chapter(1.5, 2, 'sponsor'), - self._sponsor_chapter(2, 4, 'selfpromo') - ] + self._sponsor_chapter(2, 4, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' - chapters = self._chapters([10], ['c']) + [ + chapters = [ + *self._chapters([10], ['c']), self._sponsor_chapter(2, 8, 'sponsor'), - self._sponsor_chapter(4, 6, 'selfpromo') - ] + self._sponsor_chapter(4, 6, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 4, 6, 8, 10], [ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', - '[SponsorBlock]: Sponsor', 'c' + '[SponsorBlock]: Sponsor', 'c', ]), []) def test_make_concat_opts_CommonCase(self): diff --git a/test/test_socks.py b/test/test_socks.py index 43d612d85d17..68af19d0ca44 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -95,7 +95,7 @@ def handle(self): return elif Socks5Auth.AUTH_USER_PASS in methods: - self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS)) + self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS)) _, user_len = struct.unpack('!BB', self.connection.recv(2)) username = self.connection.recv(user_len).decode() @@ -174,7 +174,7 @@ def handle(self): if 0x0 < dest_ip <= 0xFF: use_remote_dns = True else: - socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip)) + socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack('!I', dest_ip)) user_id = self._read_until_null().decode() if user_id != (self.socks_kwargs.get('user_id') or ''): @@ -291,7 +291,7 @@ def ctx(request): ('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws'), - ('CurlCFFI', 'http') + ('CurlCFFI', 'http'), ], indirect=True) class TestSocks4Proxy: def test_socks4_no_auth(self, handler, ctx): @@ -366,7 +366,7 @@ def test_timeout(self, handler, ctx): ('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws'), - ('CurlCFFI', 'http') + ('CurlCFFI', 'http'), ], indirect=True) class TestSocks5Proxy: diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 57362895f10a..f3b00561791d 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -40,12 +40,11 @@ def setUp(self): self.ie = self.IE() self.DL.add_info_extractor(self.ie) if not self.IE.working(): - print('Skipping: %s marked as not _WORKING' % self.IE.ie_key()) + print(f'Skipping: {self.IE.ie_key()} marked as not _WORKING') self.skipTest('IE marked as not _WORKING') def getInfoDict(self): - info_dict = self.DL.extract_info(self.url, download=False) - return info_dict + return self.DL.extract_info(self.url, download=False) def getSubtitles(self): info_dict = self.getInfoDict() @@ -87,7 +86,7 @@ def test_youtube_allsubtitles(self): self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') for lang in ['fr', 'de']: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') def _test_subtitles_format(self, fmt, md5_hash, lang='en'): self.DL.params['writesubtitles'] = True @@ -157,7 +156,7 @@ def test_allsubtitles(self): self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') for lang in ['es', 'fr', 'de']: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') @@ -182,7 +181,7 @@ def test_allsubtitles(self): self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') for lang in ['es', 'fr', 'de']: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') @is_download_test diff --git a/test/test_traversal.py b/test/test_traversal.py index 9b2a27b0807f..5d9fbe1d162a 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -31,7 +31,7 @@ def test_traversal_base(self): 'allow tuple path' assert traverse_obj(_TEST_DATA, ['str']) == 'str', \ 'allow list path' - assert traverse_obj(_TEST_DATA, (value for value in ("str",))) == 'str', \ + assert traverse_obj(_TEST_DATA, (value for value in ('str',))) == 'str', \ 'allow iterable path' assert traverse_obj(_TEST_DATA, 'str') == 'str', \ 'single items should be treated as a path' @@ -70,7 +70,7 @@ def test_traversal_function(self): def test_traversal_set(self): # transformation/type, like `expected_type` - assert traverse_obj(_TEST_DATA, (..., {str.upper}, )) == ['STR'], \ + assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \ 'Function in set should be a transformation' assert traverse_obj(_TEST_DATA, (..., {str})) == ['str'], \ 'Type in set should be a type filter' @@ -276,7 +276,7 @@ def test_traversal_traverse_string(self): '`...` should result in string (same value) if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), traverse_string=True) == 'sr', \ '`slice` should result in string if `traverse_string`' - assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), traverse_string=True) == 'str', \ + assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'), traverse_string=True) == 'str', \ 'function should result in string if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), traverse_string=True) == ['s', 'r'], \ 'branching should result in list if `traverse_string`' diff --git a/test/test_update.py b/test/test_update.py index bc139562f4a4..63a21e445fc5 100644 --- a/test/test_update.py +++ b/test/test_update.py @@ -78,11 +78,11 @@ TEST_LOCKFILE_COMMENT = '# This file is used for regulating self-update' -TEST_LOCKFILE_V1 = r'''%s +TEST_LOCKFILE_V1 = rf'''{TEST_LOCKFILE_COMMENT} lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) -''' % TEST_LOCKFILE_COMMENT +''' TEST_LOCKFILE_V2_TMPL = r'''%s lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 @@ -98,12 +98,12 @@ TEST_LOCKFILE_ACTUAL = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_V1.rstrip('\n') -TEST_LOCKFILE_FORK = r'''%s# Test if a fork blocks updates to non-numeric tags +TEST_LOCKFILE_FORK = rf'''{TEST_LOCKFILE_ACTUAL}# Test if a fork blocks updates to non-numeric tags lockV2 fork/yt-dlp pr0000 .+ Python 3.6 lockV2 fork/yt-dlp pr1234 (?!win_x86_exe).+ Python 3\.7 lockV2 fork/yt-dlp pr1234 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 fork/yt-dlp pr9999 .+ Python 3.11 -''' % TEST_LOCKFILE_ACTUAL +''' class FakeUpdater(Updater): diff --git a/test/test_utils.py b/test/test_utils.py index 77fadbbeab37..251739686efe 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -276,8 +276,8 @@ def env(var): self.assertEqual(expand_path(env('HOME')), os.getenv('HOME')) self.assertEqual(expand_path('~'), os.getenv('HOME')) self.assertEqual( - expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), - '%s/expanded' % os.getenv('HOME')) + expand_path('~/{}'.format(env('yt_dlp_EXPATH_PATH'))), + '{}/expanded'.format(os.getenv('HOME'))) finally: os.environ['HOME'] = old_home or '' @@ -356,12 +356,12 @@ def test_datetime_from_str(self): self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) def test_daterange(self): - _20century = DateRange("19000101", "20000101") - self.assertFalse("17890714" in _20century) - _ac = DateRange("00010101") - self.assertTrue("19690721" in _ac) - _firstmilenium = DateRange(end="10000101") - self.assertTrue("07110427" in _firstmilenium) + _20century = DateRange('19000101', '20000101') + self.assertFalse('17890714' in _20century) + _ac = DateRange('00010101') + self.assertTrue('19690721' in _ac) + _firstmilenium = DateRange(end='10000101') + self.assertTrue('07110427' in _firstmilenium) def test_unified_dates(self): self.assertEqual(unified_strdate('December 21, 2010'), '20101221') @@ -506,7 +506,7 @@ def test_xpath_attr(self): self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) def test_smuggle_url(self): - data = {"ö": "ö", "abc": [3]} + data = {'ö': 'ö', 'abc': [3]} url = 'https://foo.bar/baz?x=y#a' smug_url = smuggle_url(url, data) unsmug_url, unsmug_data = unsmuggle_url(smug_url) @@ -784,7 +784,7 @@ def test_parse_iso8601(self): def test_strip_jsonp(self): stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') d = json.loads(stripped) - self.assertEqual(d, [{"id": "532cb", "x": 3}]) + self.assertEqual(d, [{'id': '532cb', 'x': 3}]) stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') d = json.loads(stripped) @@ -922,19 +922,19 @@ def test_escape_rfc3986(self): def test_normalize_url(self): self.assertEqual( normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), - 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' + 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4', ) self.assertEqual( normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), - 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' + 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290', ) self.assertEqual( normalize_url('http://тест.рф/фрагмент'), - 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82', ) self.assertEqual( normalize_url('http://тест.рф/абв?абв=абв#абв'), - 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2', ) self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') @@ -979,7 +979,7 @@ def test_js_to_json_vars_strings(self): 'e': 'false', 'f': '"false"', 'g': 'var', - } + }, )), { 'null': None, @@ -988,8 +988,8 @@ def test_js_to_json_vars_strings(self): 'trueStr': 'true', 'false': False, 'falseStr': 'false', - 'unresolvedVar': 'var' - } + 'unresolvedVar': 'var', + }, ) self.assertDictEqual( @@ -1005,14 +1005,14 @@ def test_js_to_json_vars_strings(self): 'b': '"123"', 'c': '1.23', 'd': '"1.23"', - } + }, )), { 'int': 123, 'intStr': '123', 'float': 1.23, 'floatStr': '1.23', - } + }, ) self.assertDictEqual( @@ -1028,14 +1028,14 @@ def test_js_to_json_vars_strings(self): 'b': '"{}"', 'c': '[]', 'd': '"[]"', - } + }, )), { 'object': {}, 'objectStr': '{}', 'array': [], 'arrayStr': '[]', - } + }, ) def test_js_to_json_realworld(self): @@ -1081,7 +1081,7 @@ def test_js_to_json_realworld(self): def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") - self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + self.assertEqual(json.loads(on), {'abc_def': "1'\\2\\'3\"4"}) on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) @@ -1113,9 +1113,9 @@ def test_js_to_json_edgecases(self): 'c': 0, 'd': 42.42, 'e': [], - 'f': "abc", - 'g': "", - '42': 42 + 'f': 'abc', + 'g': '', + '42': 42, }) on = js_to_json('["abc", "def",]') @@ -1209,8 +1209,8 @@ def test_js_to_json_common_constructors(self): self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) - self.assertEqual(json.loads(js_to_json('new Date("123")')), "123") - self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19") + self.assertEqual(json.loads(js_to_json('new Date("123")')), '123') + self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), '2023-10-19') def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) @@ -1265,7 +1265,7 @@ def test_intlist_to_bytes(self): def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' + 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', ) def test_parse_filesize(self): @@ -1348,10 +1348,10 @@ def test_is_html(self): self.assertTrue(is_html( # UTF-8 with BOM b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa')) self.assertTrue(is_html( # UTF-16-LE - b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' + b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00', )) self.assertTrue(is_html( # UTF-16-BE - b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' + b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4', )) self.assertTrue(is_html( # UTF-32-BE b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) @@ -1935,7 +1935,7 @@ def test_locked_file(self): with locked_file(FILE, test_mode, False): pass except (BlockingIOError, PermissionError): - if not testing_write: # FIXME + if not testing_write: # FIXME: blocked read access print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') continue self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') @@ -2003,7 +2003,7 @@ def total(*x, **kwargs): msg='int fn with expected_type int should give int') self.assertEqual(try_call(lambda: 1, expected_type=dict), None, msg='int fn with wrong expected_type should give None') - self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1, + self.assertEqual(try_call(total, args=(0, 1, 0), expected_type=int), 1, msg='fn should accept arglist') self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1, msg='fn should accept kwargs') diff --git a/test/test_websockets.py b/test/test_websockets.py index aa0dfa2d5570..5f101abcc6a0 100644 --- a/test/test_websockets.py +++ b/test/test_websockets.py @@ -297,14 +297,14 @@ def test_request_headers(self, handler): 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'), 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'clientencrypted.key'), 'client_certificate_password': 'foobar', - } + }, )) def test_mtls(self, handler, client_cert): with handler( # Disable client-side validation of unacceptable self-signed testcert.pem # The test is of a check on the server side, so unaffected verify=False, - client_cert=client_cert + client_cert=client_cert, ) as rh: ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close() diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py index 81be5d3c9e1d..81b116217a31 100644 --- a/test/test_youtube_misc.py +++ b/test/test_youtube_misc.py @@ -13,7 +13,7 @@ class TestYoutubeMisc(unittest.TestCase): def test_youtube_extract(self): - assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId = lambda url, video_id: self.assertEqual(YoutubeIE.extract_id(url), video_id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index c5592845b3d9..bfaff83a0aa5 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -46,17 +46,17 @@ ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 84, - '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', 83, - '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' + '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', - '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' + '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', @@ -207,7 +207,7 @@ def tearDown(self): def t_factory(name, sig_func, url_pattern): def make_tfunc(url, sig_input, expected_sig): m = url_pattern.match(url) - assert m, '%r should follow URL format' % url + assert m, f'{url!r} should follow URL format' test_id = m.group('id') def test_func(self): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c6f695d091f..5abcb4635c41 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -109,7 +109,6 @@ determine_protocol, encode_compat_str, encodeFilename, - error_to_compat_str, escapeHTML, expand_path, extract_basic_auth, @@ -583,7 +582,7 @@ class YoutubeDL: 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', - 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', } _deprecated_multivalue_fields = { 'album_artist': 'album_artists', @@ -594,7 +593,7 @@ class YoutubeDL: } _format_selection_exts = { 'audio': set(MEDIA_EXTENSIONS.common_audio), - 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), + 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'}, 'storyboards': set(MEDIA_EXTENSIONS.storyboards), } @@ -628,7 +627,7 @@ def __init__(self, params=None, auto_init=True): error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, console=None if compat_os_name == 'nt' else next( - filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) try: @@ -679,9 +678,9 @@ def process_color_policy(stream): width_args = [] if width is None else ['-w', str(width)] sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: - self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + self._output_process = Popen(['bidiv', *width_args], **sp_kwargs) except OSError: - self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: @@ -822,8 +821,7 @@ def warn_if_short_id(self, argv): ) self.report_warning( 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s' % - shell_quote(correct_argv)) + f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}') def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -922,7 +920,7 @@ def to_screen(self, message, skip_eol=False, quiet=None, only_once=False): if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): return self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files.screen, only_once=only_once) def to_stderr(self, message, only_once=False): @@ -1045,10 +1043,10 @@ def _format_err(self, *args, **kwargs): return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): - ''' + """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored - ''' + """ if self.params.get('logger') is not None: self.params['logger'].warning(message) else: @@ -1066,14 +1064,14 @@ def deprecated_feature(self, message): self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True) def report_error(self, message, *args, **kwargs): - ''' + """ Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. - ''' + """ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): - '''Log debug message or Print message to stderr''' + """Log debug message or Print message to stderr""" if not self.params.get('verbose', False): return message = f'[debug] {message}' @@ -1085,14 +1083,14 @@ def write_debug(self, message, only_once=False): def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_screen('[download] %s has already been downloaded' % file_name) + self.to_screen(f'[download] {file_name} has already been downloaded') except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def report_file_delete(self, file_name): """Report that existing file will be deleted.""" try: - self.to_screen('Deleting existing file %s' % file_name) + self.to_screen(f'Deleting existing file {file_name}') except UnicodeEncodeError: self.to_screen('Deleting existing file') @@ -1147,7 +1145,7 @@ def _outtmpl_expandpath(outtmpl): @staticmethod def escape_outtmpl(outtmpl): - ''' Escape any remaining strings like %s, %abc% etc. ''' + """ Escape any remaining strings like %s, %abc% etc. """ return re.sub( STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), @@ -1155,7 +1153,7 @@ def escape_outtmpl(outtmpl): @classmethod def validate_outtmpl(cls, outtmpl): - ''' @return None or Exception object ''' + """ @return None or Exception object """ outtmpl = re.sub( STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', @@ -1208,13 +1206,13 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): } # Field is of the form key1.key2... # where keys (except first) can be string, int, slice or "{field, ...}" - FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} - FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { + FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031 + FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031 'inner': FIELD_INNER_RE, - 'field': rf'\w*(?:\.{FIELD_INNER_RE})*' + 'field': rf'\w*(?:\.{FIELD_INNER_RE})*', } MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' - MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys()))) INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) (?P<negate>-)? (?P<fields>{FIELD_RE}) @@ -1337,7 +1335,7 @@ def create_key(outer_mobj): value, default = None, na fmt = outer_mobj.group('format') - if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int): + if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int): fmt = f'0{field_size_compat_map[last_field]:d}d' flags = outer_mobj.group('conversion') or '' @@ -1362,7 +1360,7 @@ def create_key(outer_mobj): elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD - 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), + 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'D': # decimal suffix num_fmt, fmt = fmt[:-1].replace('#', ''), 's' @@ -1390,7 +1388,7 @@ def create_key(outer_mobj): if fmt[-1] in 'csra': value = sanitizer(last_field, value) - key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) @@ -1479,9 +1477,9 @@ def check_filter(): date = info_dict.get('upload_date') if date is not None: - dateRange = self.params.get('daterange', DateRange()) - if date not in dateRange: - return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' + date_range = self.params.get('daterange', DateRange()) + if date not in date_range: + return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1491,7 +1489,7 @@ def check_filter(): if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % video_title + return f'Skipping "{video_title}" because it is age restricted' match_filter = self.params.get('match_filter') if match_filter is None: @@ -1544,7 +1542,7 @@ def check_filter(): @staticmethod def add_extra_info(info_dict, extra_info): - '''Set the keys from extra_info in info dict if they are missing''' + """Set the keys from extra_info in info dict if they are missing""" for key, value in extra_info.items(): info_dict.setdefault(key, value) @@ -1590,7 +1588,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None, self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: ' 'has already been recorded in the archive') if self.params.get('break_on_existing', False): - raise ExistingVideoReached() + raise ExistingVideoReached break return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) else: @@ -1616,8 +1614,8 @@ def wrapper(self, *args, **kwargs): except GeoRestrictedError as e: msg = e.msg if e.countries: - msg += '\nThis video is available in %s.' % ', '.join( - map(ISO3166Utils.short2full, e.countries)) + msg += '\nThis video is available in {}.'.format(', '.join( + map(ISO3166Utils.short2full, e.countries))) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected @@ -1826,8 +1824,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): if isinstance(additional_urls, str): additional_urls = [additional_urls] self.to_screen( - '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) - self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls))) ie_result['additional_entries'] = [ self.extract_info( url, download, extra_info=extra_info, @@ -1879,8 +1877,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url if webpage_url and webpage_url in self._playlist_urls: self.to_screen( - '[download] Skipping already downloaded playlist: %s' - % ie_result.get('title') or ie_result.get('id')) + '[download] Skipping already downloaded playlist: {}'.format( + ie_result.get('title')) or ie_result.get('id')) return self._playlist_level += 1 @@ -1895,8 +1893,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): self._playlist_urls.clear() elif result_type == 'compat_list': self.report_warning( - 'Extractor %s returned a compat_list result. ' - 'It needs to be updated.' % ie_result.get('extractor')) + 'Extractor {} returned a compat_list result. ' + 'It needs to be updated.'.format(ie_result.get('extractor'))) def _fixup(r): self.add_extra_info(r, { @@ -1913,7 +1911,7 @@ def _fixup(r): ] return ie_result else: - raise Exception('Invalid result type: %s' % result_type) + raise Exception(f'Invalid result type: {result_type}') def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) @@ -2029,8 +2027,9 @@ def __process_playlist(self, ie_result, download): resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading item %s of %s' % ( - self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) + self.to_screen( + f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' + f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, @@ -2080,9 +2079,9 @@ def _build_format_filter(self, filter_spec): } operator_rex = re.compile(r'''(?x)\s* (?P<key>[\w.-]+)\s* - (?P<op>%s)(?P<none_inclusive>\s*\?)?\s* + (?P<op>{})(?P<none_inclusive>\s*\?)?\s* (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* - ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, OPERATORS.keys())))) m = operator_rex.fullmatch(filter_spec) if m: try: @@ -2093,7 +2092,7 @@ def _build_format_filter(self, filter_spec): comparison_value = parse_filesize(m.group('value') + 'B') if comparison_value is None: raise ValueError( - 'Invalid value %r in format specification %r' % ( + 'Invalid value {!r} in format specification {!r}'.format( m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] @@ -2103,15 +2102,15 @@ def _build_format_filter(self, filter_spec): '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, - '~=': lambda attr, value: value.search(attr) is not None + '~=': lambda attr, value: value.search(attr) is not None, } str_operator_rex = re.compile(r'''(?x)\s* (?P<key>[a-zA-Z0-9._-]+)\s* - (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)? + (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)? (?P<quote>["'])? (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) (?(quote)(?P=quote))\s* - ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, STR_OPERATORS.keys())))) m = str_operator_rex.fullmatch(filter_spec) if m: if m.group('op') == '~=': @@ -2125,7 +2124,7 @@ def _build_format_filter(self, filter_spec): op = str_op if not m: - raise SyntaxError('Invalid filter specification %r' % filter_spec) + raise SyntaxError(f'Invalid filter specification {filter_spec!r}') def _filter(f): actual_value = f.get(m.group('key')) @@ -2141,7 +2140,7 @@ def _check_formats(self, formats): if working: yield f continue - self.to_screen('[info] Testing format %s' % f['format_id']) + self.to_screen('[info] Testing format {}'.format(f['format_id'])) path = self.get_output_path('temp') if not self._ensure_dir_exists(f'{path}/'): continue @@ -2149,19 +2148,19 @@ def _check_formats(self, formats): temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, OSError, ValueError) + network_exceptions: + except (DownloadError, OSError, ValueError, *network_exceptions): success = False finally: if os.path.exists(temp_file.name): try: os.remove(temp_file.name) except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: yield f else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) def _select_formats(self, formats, selector): return list(selector({ @@ -2214,8 +2213,8 @@ def syntax_error(note, start): def _parse_filter(tokens): filter_parts = [] - for type, string_, start, _, _ in tokens: - if type == tokenize.OP and string_ == ']': + for type_, string_, _start, _, _ in tokens: + if type_ == tokenize.OP and string_ == ']': return ''.join(filter_parts) else: filter_parts.append(string_) @@ -2225,23 +2224,23 @@ def _remove_unused_ops(tokens): # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None - for type, string_, start, end, line in tokens: - if type == tokenize.OP and string_ == '[': + for type_, string_, start, end, line in tokens: + if type_ == tokenize.OP and string_ == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line + yield type_, string_, start, end, line # everything inside brackets will be handled by _parse_filter - for type, string_, start, end, line in tokens: - yield type, string_, start, end, line - if type == tokenize.OP and string_ == ']': + for type_, string_, start, end, line in tokens: + yield type_, string_, start, end, line + if type_ == tokenize.OP and string_ == ']': break - elif type == tokenize.OP and string_ in ALLOWED_OPS: + elif type_ == tokenize.OP and string_ in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line - elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + yield type_, string_, start, end, line + elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: last_string = string_ last_start = start @@ -2254,13 +2253,13 @@ def _remove_unused_ops(tokens): def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None - for type, string_, start, _, _ in tokens: + for type_, string_, start, _, _ in tokens: # ENCODING is only defined in Python 3.x - if type == getattr(tokenize, 'ENCODING', None): + if type_ == getattr(tokenize, 'ENCODING', None): continue - elif type in [tokenize.NAME, tokenize.NUMBER]: + elif type_ in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string_, []) - elif type == tokenize.OP: + elif type_ == tokenize.OP: if string_ == ')': if not inside_group: # ')' will be handled by the parentheses group @@ -2303,7 +2302,7 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error(f'Operator not recognized: "{string_}"', start) - elif type == tokenize.ENDMARKER: + elif type_ == tokenize.ENDMARKER: break if current_selector: selectors.append(current_selector) @@ -2378,7 +2377,7 @@ def _merge(formats_pair): 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), - 'audio_channels': the_only_audio.get('audio_channels') + 'audio_channels': the_only_audio.get('audio_channels'), }) return new_dict @@ -2459,9 +2458,9 @@ def selector_function(ctx): format_fallback = not format_type and not format_modified # for b, w _filter_f = ( - (lambda f: f.get('%scodec' % format_type) != 'none') + (lambda f: f.get(f'{format_type}codec') != 'none') if format_type and format_modified # bv*, ba*, wv*, wa* - else (lambda f: f.get('%scodec' % not_format_type) == 'none') + else (lambda f: f.get(f'{not_format_type}codec') == 'none') if format_type # bv, ba, wv, wa else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') if not format_modified # b, w @@ -2529,7 +2528,7 @@ def __iter__(self): def __next__(self): if self.counter >= len(self.tokens): - raise StopIteration() + raise StopIteration value = self.tokens[self.counter] self.counter += 1 return value @@ -2612,7 +2611,7 @@ def check_thumbnails(thumbnails): self._sort_thumbnails(thumbnails) for i, t in enumerate(thumbnails): if t.get('id') is None: - t['id'] = '%d' % i + t['id'] = str(i) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) @@ -2673,8 +2672,8 @@ def _fill_common_fields(self, info_dict, final=True): # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): - if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number']) for old_key, new_key in self._deprecated_multivalue_fields.items(): if new_key in info_dict and old_key in info_dict: @@ -2706,8 +2705,8 @@ def process_video_result(self, info_dict, download=True): def report_force_conversion(field, field_not, conversion): self.report_warning( - '"%s" field is not %s - forcing %s conversion, there is an error in extractor' - % (field, field_not, conversion)) + f'"{field}" field is not {field_not} - forcing {conversion} conversion, ' + 'there is an error in extractor') def sanitize_string_field(info, string_field): field = info.get(string_field) @@ -2824,28 +2823,28 @@ def is_wellformed(f): if not formats: self.raise_no_formats(info_dict) - for format in formats: - sanitize_string_field(format, 'format_id') - sanitize_numeric_fields(format) - format['url'] = sanitize_url(format['url']) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() - if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): - if format.get('acodec') is None: - format['acodec'] = format['ext'] - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if format.get('aspect_ratio') is None: - format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + for fmt in formats: + sanitize_string_field(fmt, 'format_id') + sanitize_numeric_fields(fmt) + fmt['url'] = sanitize_url(fmt['url']) + if fmt.get('ext') is None: + fmt['ext'] = determine_ext(fmt['url']).lower() + if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): + if fmt.get('acodec') is None: + fmt['acodec'] = fmt['ext'] + if fmt.get('protocol') is None: + fmt['protocol'] = determine_protocol(fmt) + if fmt.get('resolution') is None: + fmt['resolution'] = self.format_resolution(fmt, default=None) + if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': + fmt['dynamic_range'] = 'SDR' + if fmt.get('aspect_ratio') is None: + fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2)) # For fragmented formats, "tbr" is often max bitrate and not average - if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url')) - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration')) - format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) + if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url')) + and not fmt.get('filesize') and not fmt.get('filesize_approx')): + fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration')) + fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True) # Safeguard against old/insecure infojson when using --load-info-json if info_dict.get('http_headers'): @@ -2858,36 +2857,36 @@ def is_wellformed(f): self.sort_formats({ 'formats': formats, - '_format_sort_fields': info_dict.get('_format_sort_fields') + '_format_sort_fields': info_dict.get('_format_sort_fields'), }) # Sanitize and group by format_id formats_dict = {} - for i, format in enumerate(formats): - if not format.get('format_id'): - format['format_id'] = str(i) + for i, fmt in enumerate(formats): + if not fmt.get('format_id'): + fmt['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression - format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - formats_dict.setdefault(format['format_id'], []).append(format) + fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id']) + formats_dict.setdefault(fmt['format_id'], []).append(fmt) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 - for i, format in enumerate(ambiguous_formats): + for i, fmt in enumerate(ambiguous_formats): if ambigious_id: - format['format_id'] = '%s-%d' % (format_id, i) + fmt['format_id'] = f'{format_id}-{i}' # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 - if format['format_id'] != format['ext'] and format['format_id'] in common_exts: - format['format_id'] = 'f%s' % format['format_id'] - - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), + if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts: + fmt['format_id'] = 'f{}'.format(fmt['format_id']) + + if fmt.get('format') is None: + fmt['format'] = '{id} - {res}{note}'.format( + id=fmt['format_id'], + res=self.format_resolution(fmt), + note=format_field(fmt, 'format_note', ' (%s)'), ) if self.params.get('check_formats') is True: @@ -3009,7 +3008,7 @@ def to_screen(*msg): info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: - raise MaxDownloadsReached() + raise MaxDownloadsReached # We update the info dict with the selected best quality format (backwards compatibility) info_dict.update(best_format) @@ -3070,8 +3069,8 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): else: f = formats[-1] self.report_warning( - 'No subtitle format found matching "%s" for language %s, ' - 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext'])) + 'No subtitle format found matching "{}" for language {}, ' + 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext'])) subs[lang] = f return subs @@ -3226,7 +3225,7 @@ def replace_info_dict(new_info): def check_max_downloads(): if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): - raise MaxDownloadsReached() + raise MaxDownloadsReached if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') @@ -3400,7 +3399,7 @@ def correct_ext(filename, ext=new_ext): for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), - 'f%s' % f['format_id'], info_dict['ext']) + 'f{}'.format(f['format_id']), info_dict['ext']) downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) @@ -3433,7 +3432,7 @@ def correct_ext(filename, ext=new_ext): if temp_filename != '-': fname = prepend_extension( correct_ext(temp_filename, new_info['ext']), - 'f%s' % f['format_id'], new_info['ext']) + 'f{}'.format(f['format_id']), new_info['ext']) if not self._ensure_dir_exists(fname): return f['filepath'] = fname @@ -3465,11 +3464,11 @@ def correct_ext(filename, ext=new_ext): info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) except network_exceptions as err: - self.report_error('unable to download video data: %s' % error_to_compat_str(err)) + self.report_error(f'unable to download video data: {err}') return except OSError as err: raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: + except ContentTooShortError as err: self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return @@ -3536,13 +3535,13 @@ def ffmpeg_fixup(cndn, msg, cls): try: replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: - self.report_error('Postprocessing: %s' % str(err)) + self.report_error(f'Postprocessing: {err}') return try: for ph in self._post_hooks: ph(info_dict['filepath']) except Exception as err: - self.report_error('post hooks: %s' % str(err)) + self.report_error(f'post hooks: {err}') return info_dict['__write_download_archive'] = True @@ -3609,7 +3608,7 @@ def download_with_info_file(self, info_filename): @staticmethod def sanitize_info(info_dict, remove_private_keys=False): - ''' Sanitize the infodict for converting to json ''' + """ Sanitize the infodict for converting to json """ if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) @@ -3644,7 +3643,7 @@ def filter_fn(obj): @staticmethod def filter_requested_info(info_dict, actually_filter=True): - ''' Alias of sanitize_info for backward compatibility ''' + """ Alias of sanitize_info for backward compatibility """ return YoutubeDL.sanitize_info(info_dict, actually_filter) def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): @@ -3666,7 +3665,7 @@ def actual_post_extract(info_dict): actual_post_extract(video_dict or {}) return - post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + post_extractor = info_dict.pop('__post_extractor', None) or dict info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) @@ -3771,7 +3770,7 @@ def format_resolution(format, default='unknown'): if format.get('width') and format.get('height'): return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - return '%sp' % format['height'] + return '{}p'.format(format['height']) elif format.get('width'): return '%dx?' % format['width'] return default @@ -3788,7 +3787,7 @@ def _format_note(self, fdict): if fdict.get('language'): if res: res += ' ' - res += '[%s]' % fdict['language'] + res += '[{}]'.format(fdict['language']) if fdict.get('format_note') is not None: if res: res += ' ' @@ -3800,7 +3799,7 @@ def _format_note(self, fdict): if fdict.get('container') is not None: if res: res += ', ' - res += '%s container' % fdict['container'] + res += '{} container'.format(fdict['container']) if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: @@ -3815,7 +3814,7 @@ def _format_note(self, fdict): if fdict.get('fps') is not None: if res: res += ', ' - res += '%sfps' % fdict['fps'] + res += '{}fps'.format(fdict['fps']) if fdict.get('acodec') is not None: if res: res += ', ' @@ -3858,7 +3857,7 @@ def render_formats_table(self, info_dict): format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), - self._format_note(f) + self._format_note(f), ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) @@ -3964,11 +3963,11 @@ def print_debug_header(self): from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import ( _PLUGIN_CLASSES as plugin_ies, - _PLUGIN_OVERRIDES as plugin_ie_overrides + _PLUGIN_OVERRIDES as plugin_ie_overrides, ) def get_encoding(stream): - ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) + ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})')) additional_info = [] if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') @@ -3979,13 +3978,13 @@ def get_encoding(stream): ret = f'{ret} ({",".join(additional_info)})' return ret - encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( + encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format( locale.getpreferredencoding(), sys.getfilesystemencoding(), self.get_encoding(), ', '.join( f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ - if stream is not None and key != 'console') + if stream is not None and key != 'console'), ) logger = self.params.get('logger') @@ -4017,7 +4016,7 @@ def get_encoding(stream): else: write_debug('Lazy loading extractors is disabled') if self.params['compat_opts']: - write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) + write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts']))) if current_git_head(): write_debug(f'Git HEAD: {current_git_head()}') @@ -4026,14 +4025,14 @@ def get_encoding(stream): exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) + exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features))) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - write_debug('exe versions: %s' % exe_str) + write_debug(f'exe versions: {exe_str}') from .compat.compat_utils import get_package_info from .dependencies import available_dependencies @@ -4045,7 +4044,7 @@ def get_encoding(stream): write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): - display_list = ['%s%s' % ( + display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in plugins.items()] if plugin_type == 'Extractor': @@ -4062,14 +4061,13 @@ def get_encoding(stream): # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug('Public IP address: %s' % ipaddr) + write_debug(f'Public IP address: {ipaddr}') latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( - 'You are using an outdated version (newest version: %s)! ' - 'See https://yt-dl.org/update if you need help updating.' % - latest_version) + f'You are using an outdated version (newest version: {latest_version})! ' + 'See https://yt-dl.org/update if you need help updating.') @functools.cached_property def proxies(self): @@ -4103,7 +4101,7 @@ def _opener(self): return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) def _get_available_impersonate_targets(self): - # todo(future): make available as public API + # TODO(future): make available as public API return [ (target, rh.RH_NAME) for rh in self._request_director.handlers.values() @@ -4112,7 +4110,7 @@ def _get_available_impersonate_targets(self): ] def _impersonate_target_available(self, target): - # todo(future): make available as public API + # TODO(future): make available as public API return any( rh.is_supported_target(target) for rh in self._request_director.handlers.values() @@ -4238,7 +4236,7 @@ def get_encoding(self): return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' + """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """ if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -4261,7 +4259,7 @@ def _write_info_json(self, label, ie_result, infofn, overwrite=None): return None def _write_description(self, label, ie_result, descfn): - ''' Write description and returns True = written, False = skip, None = error ''' + """ Write description and returns True = written, False = skip, None = error """ if not self.params.get('writedescription'): return False elif not descfn: @@ -4285,7 +4283,7 @@ def _write_description(self, label, ie_result, descfn): return True def _write_subtitles(self, info_dict, filename): - ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error""" ret = [] subtitles = info_dict.get('requested_subtitles') if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): @@ -4331,7 +4329,7 @@ def _write_subtitles(self, info_dict, filename): self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) - except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err: msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' if not self.params.get('ignoreerrors'): @@ -4341,7 +4339,7 @@ def _write_subtitles(self, info_dict, filename): return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): - ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error ''' + """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """ write_all = self.params.get('write_all_thumbnails', False) thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): @@ -4368,8 +4366,8 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) if existing_thumb: - self.to_screen('[info] %s is already present' % ( - thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + self.to_screen('[info] {} is already present'.format(( + thumb_display_id if multiple else f'{label} thumbnail').capitalize())) t['filepath'] = existing_thumb ret.append((existing_thumb, thumb_filename_final)) else: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3d606bcba254..c18af758917d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -14,7 +14,7 @@ import re import traceback -from .compat import compat_os_name, compat_shlex_quote +from .compat import compat_os_name from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes @@ -58,6 +58,7 @@ read_stdin, render_table, setproctitle, + shell_quote, traverse_obj, variadic, write_string, @@ -115,9 +116,9 @@ def print_extractor_information(opts, urls): ie.description(markdown=False, search_examples=_SEARCHES) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: - out = 'Supported TV Providers:\n%s\n' % render_table( + out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], - [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) + [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) else: return False write_string(out, out=sys.stdout) @@ -129,7 +130,7 @@ def _unused_compat_opt(name): if name not in opts.compat_opts: return False opts.compat_opts.discard(name) - opts.compat_opts.update(['*%s' % name]) + opts.compat_opts.update([f'*{name}']) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): @@ -222,7 +223,7 @@ def validate_minmax(min_val, max_val, min_name, max_name=None): validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None]) validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), 'time range to wait for video', opts.wait_for_video) validate_minmax(min_wait, max_wait, 'time range to wait for video') @@ -264,9 +265,9 @@ def parse_retries(name, value): # Retry sleep function def parse_sleep_func(expr): NUMBER_RE = r'\d+(?:\.\d+)?' - op, start, limit, step, *_ = tuple(re.fullmatch( + op, start, limit, step, *_ = (*tuple(re.fullmatch( rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', - expr.strip()).groups()) + (None, None) + expr.strip()).groups()), None, None) if op == 'exp': return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) @@ -396,13 +397,13 @@ def parse_chapters(name, value, advanced=False): # MetadataParser def metadataparser_actions(f): if isinstance(f, str): - cmd = '--parse-metadata %s' % compat_shlex_quote(f) + cmd = f'--parse-metadata {shell_quote(f)}' try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') else: - cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + cmd = f'--replace-in-metadata {shell_quote(f)}' actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) for action in actions: @@ -413,7 +414,7 @@ def metadataparser_actions(f): yield action if opts.metafromtitle is not None: - opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}') opts.parse_metadata = { k: list(itertools.chain(*map(metadataparser_actions, v))) for k, v in opts.parse_metadata.items() @@ -602,7 +603,7 @@ def get_postprocessors(opts): yield { 'key': 'MetadataParser', 'actions': actions, - 'when': when + 'when': when, } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -610,19 +611,19 @@ def get_postprocessors(opts): 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - 'when': 'after_filter' + 'when': 'after_filter', } if opts.convertsubtitles: yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.convertthumbnails: yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.extractaudio: yield { @@ -647,7 +648,7 @@ def get_postprocessors(opts): yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': opts.writesubtitles and keep_subs + 'already_have_subtitle': opts.writesubtitles and keep_subs, } if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True @@ -660,7 +661,7 @@ def get_postprocessors(opts): 'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, - 'force_keyframes': opts.force_keyframes_at_cuts + 'force_keyframes': opts.force_keyframes_at_cuts, } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support @@ -694,7 +695,7 @@ def get_postprocessors(opts): yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding - 'already_have_thumbnail': opts.writethumbnail + 'already_have_thumbnail': opts.writethumbnail, } if not opts.writethumbnail: opts.writethumbnail = True @@ -741,7 +742,7 @@ def parse_options(argv=None): print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', - 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl', )) if opts.quiet is None: opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) @@ -1002,7 +1003,7 @@ def _real_main(argv=None): def make_row(target, handler): return [ join_nonempty(target.client.title(), target.version, delim='-') or '-', - join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-', + join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-', handler, ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b3a383cd9c3f..abf54a998e0e 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -68,7 +68,7 @@ def pad_block(block, padding_mode): raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') if padding_mode == 'iso7816' and padding_size: - block = block + [0x80] # NB: += mutates list + block = [*block, 0x80] # NB: += mutates list padding_size -= 1 return block + [PADDING_BYTE[padding_mode]] * padding_size @@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None): for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_decrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_ctr_decrypt(data, key, iv): @@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv): cipher_counter_block = aes_encrypt(counter_block, expanded_key) encrypted_data += xor(block, cipher_counter_block) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_cbc_decrypt(data, key, iv): @@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv): decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data + return decrypted_data[:len(data)] def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): @@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) if len(nonce) == 12: - j0 = nonce + [0, 0, 0, 1] + j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) @@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): data + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data - + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): - raise ValueError("Mismatching authentication tag") + raise ValueError('Mismatching authentication tag') return decrypted_data @@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key): data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data + return xor(data, expanded_key[:BLOCK_SIZE_BYTES]) def aes_decrypt_text(data, password, key_size_bytes): @@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes): cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) - plaintext = intlist_to_bytes(decrypted_data) - - return plaintext + return intlist_to_bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) @@ -428,9 +418,7 @@ def key_expansion(data): for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data + return data[:expanded_key_size_bytes] def iter_vector(iv): @@ -511,7 +499,7 @@ def block_product(block_x, block_y): # NIST SP 800-38D, Algorithm 1 if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: - raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes') block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_v = block_y[:] @@ -534,7 +522,7 @@ def ghash(subkey, data): # NIST SP 800-38D, Algorithm 2 if len(data) % BLOCK_SIZE_BYTES: - raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes') last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 9dd4f2f25b91..71dca82b3584 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -81,10 +81,10 @@ def remove(self): cachedir = self._get_root_dir() if not any((term in cachedir) for term in ('cache', 'tmp')): - raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir') self._ydl.to_screen( - 'Removing cache dir %s .' % cachedir, skip_eol=True) + f'Removing cache dir {cachedir} .', skip_eol=True) if os.path.exists(cachedir): self._ydl.to_screen('.', skip_eol=True) shutil.rmtree(cachedir) diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 7ea5d0812036..dfc792eae41e 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -35,7 +35,7 @@ from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 -from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401 +from ..networking.exceptions import HTTPError as compat_HTTPError passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py index 36c983642df1..96689575f6e2 100644 --- a/yt_dlp/compat/functools.py +++ b/yt_dlp/compat/functools.py @@ -7,6 +7,6 @@ del passthrough_module try: - cache # >= 3.9 + _ = cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 815897d5a5ac..0850ad2600a7 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger): identities = json.load(containers).get('identities', []) container_id = next((context.get('userContextId') for context in identities if container in ( context.get('name'), - try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) + try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()), )), None) if not isinstance(container_id, int): raise ValueError(f'could not find firefox container "{container}" in containers.json') @@ -263,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name): return { 'browser_dir': browser_dir, 'keyring_name': keyring_name, - 'supports_profiles': browser_name not in browsers_without_profiles + 'supports_profiles': browser_name not in browsers_without_profiles, } @@ -826,7 +826,7 @@ def _choose_linux_keyring(logger): elif desktop_environment == _LinuxDesktopEnvironment.KDE6: linux_keyring = _LinuxKeyring.KWALLET6 elif desktop_environment in ( - _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER + _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER, ): linux_keyring = _LinuxKeyring.BASICTEXT else: @@ -861,7 +861,7 @@ def _get_kwallet_network_wallet(keyring, logger): 'dbus-send', '--session', '--print-reply=literal', f'--dest={service_name}', wallet_path, - 'org.kde.KWallet.networkWallet' + 'org.kde.KWallet.networkWallet', ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -891,7 +891,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger): 'kwallet-query', '--read-password', f'{browser_keyring_name} Safe Storage', '--folder', f'{browser_keyring_name} Keys', - network_wallet + network_wallet, ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -931,9 +931,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): for item in col.get_all_items(): if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() - else: - logger.error('failed to read from keyring') - return b'' + logger.error('failed to read from keyring') + return b'' def _get_linux_keyring_password(browser_keyring_name, keyring, logger): @@ -1053,7 +1052,7 @@ class DATA_BLOB(ctypes.Structure): None, # pvReserved: must be NULL None, # pPromptStruct: information about prompts to display 0, # dwFlags - ctypes.byref(blob_out) # pDataOut + ctypes.byref(blob_out), # pDataOut ) if not ret: logger.warning('failed to decrypt with DPAPI', only_once=True) @@ -1129,24 +1128,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') _RESERVED = { - "expires", - "path", - "comment", - "domain", - "max-age", - "secure", - "httponly", - "version", - "samesite", + 'expires', + 'path', + 'comment', + 'domain', + 'max-age', + 'secure', + 'httponly', + 'version', + 'samesite', } - _FLAGS = {"secure", "httponly"} + _FLAGS = {'secure', 'httponly'} # Added 'bad' group to catch the remaining value - _COOKIE_PATTERN = re.compile(r""" + _COOKIE_PATTERN = re.compile(r''' \s* # Optional whitespace at start of cookie (?P<key> # Start of group 'key' - [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter ) # End of group 'key' ( # Optional group: there may not be a value. \s*=\s* # Equal Sign @@ -1156,7 +1155,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | # or \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | # or - [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' | # or (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values @@ -1164,7 +1163,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) + ''', re.ASCII | re.VERBOSE) def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 @@ -1260,14 +1259,14 @@ def _really_save(self, f, ignore_discard, ignore_expires): # with no name, whereas http.cookiejar regards it as a # cookie with no value. name, value = '', name - f.write('%s\n' % '\t'.join(( + f.write('{}\n'.format('\t'.join(( cookie.domain, self._true_or_false(cookie.domain.startswith('.')), cookie.path, self._true_or_false(cookie.secure), str_or_none(cookie.expires, default=''), - name, value - ))) + name, value, + )))) def save(self, filename=None, ignore_discard=True, ignore_expires=True): """ @@ -1306,10 +1305,10 @@ def prepare_line(line): return line cookie_list = line.split('\t') if len(cookie_list) != self._ENTRY_LEN: - raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) + raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) if cookie.expires_at and not cookie.expires_at.isdigit(): - raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line cf = io.StringIO() diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 65a0d6f23483..2e3ea2fc4e4d 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -404,7 +404,7 @@ def with_fields(*tups, default=''): def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" - self.to_screen('[download] Resuming download at byte %s' % resume_len) + self.to_screen(f'[download] Resuming download at byte {resume_len}') def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): """Report retry""" diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 8b0b94e72560..8b45c671a0fb 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -55,7 +55,7 @@ def real_download(self, filename, info_dict): # correct and expected termination thus all postprocessing # should take place retval = 0 - self.to_screen('[%s] Interrupted by user' % self.get_basename()) + self.to_screen(f'[{self.get_basename()}] Interrupted by user') finally: if self._cookies_tempfile: self.try_remove(self._cookies_tempfile) @@ -172,7 +172,7 @@ def _call_downloader(self, tmpfilename, info_dict): decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + fragment_filename = f'{tmpfilename}-Frag{frag_index}' try: src, _ = self.sanitize_open(fragment_filename, 'rb') except OSError as err: @@ -186,7 +186,7 @@ def _call_downloader(self, tmpfilename, info_dict): if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(fragment_filename)) dest.close() - self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) return 0 def _call_process(self, cmd, info_dict): @@ -336,11 +336,11 @@ def _make_cmd(self, tmpfilename, info_dict): if 'fragments' in info_dict: cmd += ['--uri-selector=inorder'] - url_list_file = '%s.frag.urls' % tmpfilename + url_list_file = f'{tmpfilename}.frag.urls' url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) - url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) + fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' + url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() @@ -357,7 +357,7 @@ def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): 'id': sanitycheck, 'method': method, 'params': [f'token:{rpc_secret}', *params], - }).encode('utf-8') + }).encode() request = Request( f'http://localhost:{rpc_port}/jsonrpc', data=d, headers={ @@ -416,7 +416,7 @@ def get_stat(key, *obj, average=False): 'total_bytes_estimate': total, 'eta': (total - downloaded) / (speed or 1), 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, - 'elapsed': time.time() - started + 'elapsed': time.time() - started, }) self._hook_progress(status, info_dict) @@ -509,12 +509,12 @@ def _call_downloader(self, tmpfilename, info_dict): proxy = self.params.get('proxy') if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): - proxy = 'http://%s' % proxy + proxy = f'http://{proxy}' if proxy.startswith('socks'): self.report_warning( - '%s does not support SOCKS proxies. Downloading is likely to fail. ' - 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.') # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) @@ -575,7 +575,7 @@ def _call_downloader(self, tmpfilename, info_dict): if end_time: args += ['-t', str(end_time - start_time)] - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 28cbba0169d8..22d0ebd26555 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -67,12 +67,12 @@ def read_asrt(self): self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] - for i in range(segment_run_count): + for _ in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) @@ -91,12 +91,12 @@ def read_afrt(self): quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] - for i in range(fragments_count): + for _ in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() @@ -135,11 +135,11 @@ def read_abst(self): self.read_string() # MovieIdentifier server_count = self.read_unsigned_char() # ServerEntryTable - for i in range(server_count): + for _ in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable - for i in range(quality_count): + for _ in range(quality_count): self.read_string() # DrmData self.read_string() @@ -148,14 +148,14 @@ def read_abst(self): segments_count = self.read_unsigned_char() segments = [] - for i in range(segments_count): + for _ in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] - for i in range(fragments_run_count): + for _ in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) @@ -309,7 +309,7 @@ def _parse_bootstrap_node(self, node, base_url): def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') - self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -326,8 +326,8 @@ def real_download(self, filename, info_dict): formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] else: - rate, media = list(filter( - lambda f: int(f[0]) == requested_bitrate, formats))[0] + rate, media = next(filter( + lambda f: int(f[0]) == requested_bitrate, formats)) # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index b4f003d37f54..0d00196e2eed 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -199,7 +199,7 @@ def _prepare_frag_download(self, ctx): '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( - '%s. Restarting from the beginning ...' % message) + f'{message}. Restarting from the beginning ...') ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] @@ -366,10 +366,10 @@ def decrypt_fragment(fragment, frag_content): return decrypt_fragment def download_and_append_fragments_multiple(self, *args, **kwargs): - ''' + """ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list - ''' + """ interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: @@ -424,7 +424,7 @@ def interrupt_trigger_iter(fg): finally: tpe.shutdown(wait=True) if not interrupt_trigger[0] and not is_live: - raise KeyboardInterrupt() + raise KeyboardInterrupt # we expect the user wants to stop and DO WANT the preceding postprocessors to run; # so returning a intermediate result here instead of KeyboardInterrupt on live return result diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 4ac5d99dc0fd..9cb4f014c057 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,7 +72,7 @@ def check_results(): def real_download(self, filename, info_dict): man_url = info_dict['url'] - self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -228,7 +228,7 @@ def is_ad_fragment_end(s): 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, - 'media_sequence': media_sequence + 'media_sequence': media_sequence, }) media_sequence += 1 @@ -350,9 +350,8 @@ def pack_fragment(frag_content, frag_index): # XXX: this should probably be silent as well # or verify that all segments contain the same data self.report_warning(bug_reports_message( - 'Discarding a %s block found in the middle of the stream; ' - 'if the subtitles display incorrectly,' - % (type(block).__name__))) + f'Discarding a {type(block).__name__} block found in the middle of the stream; ' + 'if the subtitles display incorrectly,')) continue block.write_into(output) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 693828b6e5e7..c0165790d11d 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -176,7 +176,7 @@ def establish_connection(): 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, }, info_dict) - raise SucceedDownload() + raise SucceedDownload else: # The length does not match, we start the download over self.report_unable_to_resume() @@ -194,7 +194,7 @@ def establish_connection(): def close_stream(): if ctx.stream is not None: - if not ctx.tmpfilename == '-': + if ctx.tmpfilename != '-': ctx.stream.close() ctx.stream = None @@ -268,20 +268,20 @@ def retry(e): ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) except OSError as err: - self.report_error('unable to open for writing: %s' % str(err)) + self.report_error(f'unable to open for writing: {err}') return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) + self.report_error(f'unable to set filesize xattr: {err}') try: ctx.stream.write(data_block) except OSError as err: self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) + self.report_error(f'unable to write data: {err}') return False # Apply rate limit @@ -327,7 +327,7 @@ def retry(e): elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() - raise ThrottledDownload() + raise ThrottledDownload elif speed: ctx.throttle_start = None @@ -338,7 +338,7 @@ def retry(e): if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter - raise NextFragment() + raise NextFragment if ctx.tmpfilename != '-': ctx.stream.close() diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index dd688f586d57..62c3a3b7fd31 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -251,7 +251,7 @@ def real_download(self, filename, info_dict): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 - for i, segment in enumerate(segments): + for segment in segments: frag_index += 1 if frag_index <= ctx['fragment_index']: continue diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index d977dcec31fb..3d4f2d7634a8 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -10,7 +10,7 @@ class MhtmlFD(FragmentFD): - _STYLESHEET = """\ + _STYLESHEET = '''\ html, body { margin: 0; padding: 0; @@ -45,7 +45,7 @@ class MhtmlFD(FragmentFD): max-width: 100%; max-height: calc(100vh - 5em); } -""" +''' _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) @@ -57,24 +57,19 @@ def _escape_mime(s): )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): - return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) + return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid' def _gen_stub(self, *, fragments, frag_boundary, title): output = io.StringIO() - output.write(( + output.write( '<!DOCTYPE html>' '<html>' '<head>' - '' '<meta name="generator" content="yt-dlp {version}">' - '' '<title>{title}' - '' '' - '' - ).format( - version=escapeHTML(YT_DLP_VERSION), - styles=self._STYLESHEET, - title=escapeHTML(title) - )) + f'' + f'{escapeHTML(title)}' + f'' + '') t0 = 0 for i, frag in enumerate(fragments): @@ -87,15 +82,12 @@ def _gen_stub(self, *, fragments, frag_boundary, title): num=i + 1, t0=srt_subtitles_timecode(t0), t1=srt_subtitles_timecode(t1), - duration=formatSeconds(frag['duration'], msec=True) + duration=formatSeconds(frag['duration'], msec=True), )) except (KeyError, ValueError, TypeError): t1 = None - output.write(( - '
Slide #{num}
' - ).format(num=i + 1)) - output.write(''.format( - cid=self._gen_cid(i, frag, frag_boundary))) + output.write(f'
Slide #{i + 1}
') + output.write(f'') output.write('') t0 = t1 @@ -126,31 +118,24 @@ def real_download(self, filename, info_dict): stub = self._gen_stub( fragments=fragments, frag_boundary=frag_boundary, - title=title + title=title, ) ctx['dest_stream'].write(( 'MIME-Version: 1.0\r\n' 'From: \r\n' 'To: \r\n' - 'Subject: {title}\r\n' + f'Subject: {self._escape_mime(title)}\r\n' 'Content-type: multipart/related; ' - '' 'boundary="{boundary}"; ' - '' 'type="text/html"\r\n' - 'X.yt-dlp.Origin: {origin}\r\n' + f'boundary="{frag_boundary}"; ' + 'type="text/html"\r\n' + f'X.yt-dlp.Origin: {origin}\r\n' '\r\n' - '--{boundary}\r\n' + f'--{frag_boundary}\r\n' 'Content-Type: text/html; charset=utf-8\r\n' - 'Content-Length: {length}\r\n' + f'Content-Length: {len(stub)}\r\n' '\r\n' - '{stub}\r\n' - ).format( - origin=origin, - boundary=frag_boundary, - length=len(stub), - title=self._escape_mime(title), - stub=stub - ).encode()) + f'{stub}\r\n').encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index fef8bff73ad9..462c6e2d63ec 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader): def real_download(self, filename, info_dict): from ..extractor.niconico import NiconicoIE - self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading from DMC') ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) @@ -34,7 +34,7 @@ def heartbeat(): try: self.ydl.urlopen(request).read() except Exception: - self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Heartbeat failed') with heartbeat_lock: if not download_complete: @@ -85,14 +85,14 @@ def communicate_ws(reconnect): 'quality': live_quality, 'protocol': 'hls+fmp4', 'latency': live_latency, - 'chasePlay': False + 'chasePlay': False, }, 'room': { 'protocol': 'webSocket', - 'commentable': True + 'commentable': True, }, 'reconnect': True, - } + }, })) else: ws = ws_extractor @@ -118,7 +118,7 @@ def communicate_ws(reconnect): elif self.ydl.params.get('verbose', False): if len(recv) > 100: recv = recv[:100] + '...' - self.to_screen('[debug] Server said: %s' % recv) + self.to_screen(f'[debug] Server said: {recv}') def ws_main(): reconnect = False @@ -128,7 +128,7 @@ def ws_main(): if ret is True: return except BaseException as e: - self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) + self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) time.sleep(10) continue finally: diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 0e09525991ff..d7ffb3b34df5 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -180,9 +180,9 @@ def run_rtmpdump(args): while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed - args = basic_args + ['--resume'] + args = [*basic_args, '--resume'] if retval == RD_FAILED: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] @@ -197,7 +197,7 @@ def run_rtmpdump(args): break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index c7a86374aa2d..961938d4491f 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] - self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading live chat') if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 2c0d296fd2d7..7518ba6f0dbf 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -4,7 +4,6 @@ import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, dict_get, @@ -67,7 +66,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', - } + }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { @@ -75,7 +74,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', - } + }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { @@ -86,7 +85,7 @@ class ABCIE(InfoExtractor): 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', - } + }, }, { 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'info_dict': { @@ -95,7 +94,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', - } + }, }] def _real_extract(self, url): @@ -126,7 +125,7 @@ def _real_extract(self, url): if mobj is None: expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?(.+?)', webpage, 'expired', None) if expired: - raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True) raise ExtractorError('Unable to extract video urls') urls_info = self._parse_json( @@ -164,7 +163,7 @@ def _real_extract(self, url): 'height': height, 'tbr': bitrate, 'filesize': int_or_none(url_info.get('filesize')), - 'format_id': format_id + 'format_id': format_id, }) return { @@ -288,13 +287,12 @@ def _real_extract(self, url): stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id - path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( - int(time.time()), house_number) + path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet' sig = hmac.new( b'android.content.res.Resources', - path.encode('utf-8'), hashlib.sha256).hexdigest() + path.encode(), hashlib.sha256).hexdigest() token = self._download_webpage( - 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) + f'http://iview.abc.net.au{path}&sig={sig}', video_id) def tokenize_url(url, token): return update_url_query(url, { @@ -303,7 +301,7 @@ def tokenize_url(url, token): for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) + stream, lambda x: x['streams']['hls'][sd], str) if not sd_url: continue formats = self._extract_m3u8_formats( @@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'md5:93119346c24a7c322d446d8eece430ff', 'series': 'Upper Middle Bogan', 'season': 'Series 1', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 8, }, { @@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.', 'series': '7.30 Mark Humphries Satire', 'season': 'Episodes', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 15, }] @@ -398,7 +396,7 @@ def _real_extract(self, url): r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', webpage, 'initial state') video_data = self._parse_json( - unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id) video_data = video_data['route']['pageData']['_embedded'] highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index a57295b13110..7215500b917a 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -58,7 +58,7 @@ def _real_extract(self, url): display_id = mobj.group('display_id') video_id = mobj.group('id') info_dict = self._extract_feed_info( - 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + f'http://abcnews.go.com/video/itemfeed?id={video_id}') info_dict.update({ 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 6dca19de4180..ea5882b2606d 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( dict_get, int_or_none, @@ -57,11 +56,11 @@ def _real_extract(self, url): data = self._download_json( 'https://api.abcotvs.com/v2/content', display_id, query={ 'id': video_id, - 'key': 'otv.web.%s.story' % station, + 'key': f'otv.web.{station}.story', 'station': station, })['data'] video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data - video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + video_id = str(dict_get(video, ('id', 'publishedKey'), video_id)) title = video.get('title') or video['linkText'] formats = [] diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index b8c79b912a93..293a6c40e0ca 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -66,8 +66,8 @@ def _get_videokey_from_ticket(self, ticket): query={'t': media_token}, data=json.dumps({ 'kv': 'a', - 'lt': ticket - }).encode('utf-8'), + 'lt': ticket, + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -77,7 +77,7 @@ def _get_videokey_from_ticket(self, ticket): h = hmac.new( binascii.unhexlify(self.HKEY), - (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -103,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor): @classmethod def _generate_aks(cls, deviceid): - deviceid = deviceid.encode('utf-8') + deviceid = deviceid.encode() # add 1 hour and then drop minute and secs ts_1hour = int((time_seconds() // 3600 + 1) * 3600) time_struct = time.gmtime(ts_1hour) - ts_1hour_str = str(ts_1hour).encode('utf-8') + ts_1hour_str = str(ts_1hour).encode() tmp = None @@ -119,7 +119,7 @@ def mix_once(nonce): def mix_tmp(count): nonlocal tmp - for i in range(count): + for _ in range(count): mix_once(tmp) def mix_twist(nonce): @@ -160,7 +160,7 @@ def _get_device_token(self): data=json.dumps({ 'deviceId': self._DEVICE_ID, 'applicationKeySecret': aks, - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -180,7 +180,7 @@ def _get_media_token(self, invalidate=False, to_show=True): 'osLang': 'ja_JP', 'osTimezone': 'Asia/Tokyo', 'appId': 'tv.abema', - 'appVersion': '3.27.1' + 'appVersion': '3.27.1', }, headers={ 'Authorization': f'bearer {self._get_device_token()}', })['token'] @@ -202,8 +202,8 @@ def _perform_login(self, username, password): f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', data=json.dumps({ method: username, - 'password': password - }).encode('utf-8'), headers={ + 'password': password, + }).encode(), headers={ 'Authorization': f'bearer {self._get_device_token()}', 'Origin': 'https://abema.tv', 'Referer': 'https://abema.tv/', @@ -344,7 +344,7 @@ def _real_extract(self, url): description = self._html_search_regex( (r'(.+?)

(.+?)(.+?)(.+?)' % (tag, tag), xml_str, tag) + f'<{tag}>(.+?)', xml_str, tag) def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) @@ -1394,7 +1394,7 @@ def post_form(form_page_res, note, data={}): form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.url, post_url) + post_url = urllib.parse.urljoin(urlh.url, post_url) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( @@ -1414,13 +1414,13 @@ def extract_redirect_url(html, url=None, fatal=False): REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' redirect_url = self._search_regex( r'(?i)]+src="(%s)' % HistoryPlayerIE._VALID_URL, + rf']+src="({HistoryPlayerIE._VALID_URL})', webpage, 'player URL') return self.url_result(player_url, HistoryPlayerIE.ie_key()) diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py index 390eae32bf19..22d0266bae34 100644 --- a/yt_dlp/extractor/aeonco.py +++ b/yt_dlp/extractor/aeonco.py @@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Semiconductor', 'uploader_id': 'semiconductor', 'uploader_url': 'https://vimeo.com/semiconductor', - 'duration': 348 - } + 'duration': 348, + }, }, { 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'md5': '03582d795382e49f2fd0b427b55de409', @@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Aeon Video', 'uploader_id': 'aeonvideo', 'uploader_url': 'https://vimeo.com/aeonvideo', - 'duration': 1344 - } + 'duration': 1344, + }, }, { 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 3e5738f6ab6c..bcfb02cb95ab 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -55,7 +55,7 @@ def _perform_login(self, username, password): if result != 1: error = _ERRORS.get(result, 'You have failed to log in.') raise ExtractorError( - 'Unable to login: %s said: %s' % (self.IE_NAME, error), + f'Unable to login: {self.IE_NAME} said: {error}', expected=True) @@ -227,7 +227,7 @@ def _real_extract(self, url): **traverse_obj(file_element, { 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('file_start', {unified_timestamp}), - }) + }), }) if traverse_obj(data, ('adult_status', {str})) == 'notLogin': diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py index abb2d3ff27e3..983558425412 100644 --- a/yt_dlp/extractor/agora.py +++ b/yt_dlp/extractor/agora.py @@ -168,7 +168,7 @@ def _real_extract(self, url): for ext in ('aac', 'mp3'): url_data = self._download_json( f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', - media_id, 'Downloading podcast %s URL' % ext) + media_id, f'Downloading podcast {ext} URL') # prevents inserting the mp3 (default) multiple times if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: formats.append({ @@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor): } @staticmethod - def _create_url(id): - return f'https://audycje.tokfm.pl/audycja/{id}' + def _create_url(video_id): + return f'https://audycje.tokfm.pl/audycja/{video_id}' def _real_extract(self, url): audition_id = self._match_id(url) diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 6cc63cd7f989..cee660dfcf41 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor): 'view_count': int, 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', 'timestamp': 1664792603, - } + }, }, { # with youtube_id 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', @@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor): 'channel': 'Newsflare', 'duration': 37, 'upload_date': '20180511', - } + }, }] def _get_formats_and_subtitle(self, json_data, video_id): diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py index 89a64503fb32..5179b72e9fbc 100644 --- a/yt_dlp/extractor/aitube.py +++ b/yt_dlp/extractor/aitube.py @@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor): 'timestamp': 1667370519, 'title': 'Ангел хранитель 1 серия', 'channel_follower_count': int, - } + }, }, { # embed url 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 2e83f2eb6e55..e8f8618fa917 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -44,7 +43,7 @@ def _real_extract(self, url): 'title': title, 'thumbnail': data.get('coverUrl'), 'uploader': try_get( - data, lambda x: x['followBar']['name'], compat_str), + data, lambda x: x['followBar']['name'], str), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'formats': formats, } diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 124bab0d9278..9715b497e850 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor): 'timestamp': 1636219149, 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', 'upload_date': '20211106', - } + }, }, { 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', 'info_dict': { @@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor): BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - base, post_type, id = self._match_valid_url(url).groups() + base, post_type, display_id = self._match_valid_url(url).groups() wp = { 'balkans.aljazeera.net': 'ajb', 'chinese.aljazeera.net': 'chinese', @@ -47,11 +47,11 @@ def _real_extract(self, url): 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - f'https://{base}/graphql', id, query={ + f'https://{base}/graphql', display_id, query={ 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': id, + 'name': display_id, 'postType': post_type, }), }, headers={ @@ -64,7 +64,7 @@ def _real_extract(self, url): embed = 'default' if video_id is None: - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, display_id) account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', group=(1, 2, 3, 4), default=(None, None, None, None)) @@ -73,11 +73,11 @@ def _real_extract(self, url): return { '_type': 'url_transparent', 'url': url, - 'ie_key': 'Generic' + 'ie_key': 'Generic', } return { '_type': 'url_transparent', 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', - 'ie_key': 'BrightcoveNew' + 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 2d342cf03909..e0859d451441 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, qualities, @@ -95,11 +94,11 @@ def _real_extract(self, url): duration = int_or_none(video.get('duration')) view_count = int_or_none(video.get('view_count')) timestamp = unified_timestamp(try_get( - video, lambda x: x['added_at']['date'], compat_str)) + video, lambda x: x['added_at']['date'], str)) else: video_id = display_id media_data = self._download_json( - 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id) title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) for key, value in media_data['video'].items(): if not key.endswith('Path'): diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 49df4bf3aa14..5ea1c30e3d74 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -33,27 +33,27 @@ video: getClip(clipIdentifier: $id) { %s %s } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'montage': '''query ($id: String!) { video: getMontage(clipIdentifier: $id) { %s } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Clips': '''query ($page: Int!, $user: String!, $game: Int) { videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'Montages': '''query ($page: Int!, $user: String!) { videos: montages(search: createdDate, page: $page, user: $user) { data { %s } } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Mobile Clips': '''query ($page: Int!, $user: String!) { videos: clips(search: createdDate, page: $page, user: $user, mobile: true) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 } @@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230425', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB', 'info_dict': { @@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230702', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c', 'info_dict': { @@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230418', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS', 'info_dict': { @@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230703', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE): 'id': '62b8bdfc9021052f7905882d-clips', 'title': 'cherokee - Clips', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-clips-730', 'title': 'cherokee - Clips - 730', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-montages', 'title': 'cherokee - Montages', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, { 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-mobile', 'title': 'cherokee - Mobile Clips', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }] _PAGE_SIZE = 10 diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index f927965de9ee..7b74d5524d65 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor): 'tbr': 1145, 'categories': list, 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index ea3332e3d5cc..c315e4f21704 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -12,7 +12,7 @@ class Alsace20TVBaseIE(InfoExtractor): def _extract_video(self, video_id, url=None): info = self._download_json( - 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), + f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', video_id) or {} title = info.get('titre') @@ -24,9 +24,9 @@ def _extract_video(self, video_id, url=None): else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' - thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) + thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) - upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None + upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py index 6878918a0042..bfbf6b6afdee 100644 --- a/yt_dlp/extractor/altcensored.py +++ b/yt_dlp/extractor/altcensored.py @@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'view_count': int, 'categories': ['News & Politics'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index cb2b9891e9b7..ce03a4265bec 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -21,7 +21,7 @@ class AluraIE(InfoExtractor): 'info_dict': { 'id': '60095', 'ext': 'mp4', - 'title': 'Referências, ref-set e alter' + 'title': 'Referências, ref-set e alter', }, 'skip': 'Requires alura account credentials'}, { @@ -30,7 +30,7 @@ class AluraIE(InfoExtractor): 'only_matching': True}, { 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', - 'only_matching': True} + 'only_matching': True}, ] def _real_extract(self, url): @@ -62,7 +62,7 @@ def _real_extract(self, url): return { 'id': video_id, 'title': video_title, - "formats": formats + 'formats': formats, } def _perform_login(self, username, password): @@ -91,7 +91,7 @@ def is_logged(webpage): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', @@ -103,7 +103,7 @@ def is_logged(webpage): r'(?s)]+class="alert-message[^"]*">(.+?)

', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) + return False if AluraIE.suitable(url) else super().suitable(url) def _real_extract(self, url): @@ -157,7 +157,7 @@ def _real_extract(self, url): 'url': video_url, 'id_key': self.ie_key(), 'chapter': chapter, - 'chapter_number': chapter_number + 'chapter_number': chapter_number, } entries.append(entry) return self.playlist_result(entries, course_path, course_title) diff --git a/yt_dlp/extractor/amadeustv.py b/yt_dlp/extractor/amadeustv.py index 2f5ca9137a27..f4ea04efd8ab 100644 --- a/yt_dlp/extractor/amadeustv.py +++ b/yt_dlp/extractor/amadeustv.py @@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor): 'display_id': '65091a87ff85af59d9fc54c3', 'view_count': int, 'description': 'md5:a0357b9c215489e2067cbae0b777bb95', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 509b21a5316c..ed0f0cd357d9 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor): 'uploader': 'PBS NewsHour', 'uploader_id': 'PBSNewsHour', 'timestamp': 1549639570, - } + }, }, { # Vimeo 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', @@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor): 'timestamp': 1294763658, 'upload_date': '20110111', 'uploader': 'Sam Morrill', - 'uploader_id': 'sammorrill' - } + 'uploader_id': 'sammorrill', + }, }, { # Direct Link 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', @@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor): 'subtitles': dict, 'upload_date': '20091007', 'timestamp': 1254942511, - } + }, }] def _real_extract(self, url): video_id = self._match_id(url) meta = self._download_json( - 'https://amara.org/api/videos/%s/' % video_id, + f'https://amara.org/api/videos/{video_id}/', video_id, query={'format': 'json'}) title = meta['title'] video_url = meta['all_urls'][0] diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index a03f983e0e93..d1b91665c22e 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) + playlist_id = self._match_id(url) for retry in self.RetryManager(): - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, playlist_id) try: data_json = self._search_json( - r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, + r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id, transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -81,7 +81,7 @@ def _real_extract(self, url): 'height': int_or_none(video.get('videoHeight')), 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] - return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title')) class AmazonReviewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 2c71c5ef569c..0590a344aa5a 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -25,7 +25,7 @@ def _call_api(self, asin, data=None, note=None): asin, note=note, headers={ 'Content-Type': 'application/json', 'currentpageurl': '/', - 'currentplatform': 'dWeb' + 'currentplatform': 'dWeb', }, data=json.dumps(data).encode() if data else None, query=None if data else { 'deviceType': 'A1WMMUXPCUJL4N', diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index 10bd021c55ac..15a86e24526f 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -64,8 +64,8 @@ def _real_extract(self, url): site, display_id = self._match_valid_url(url).groups() requestor_id = self._REQUESTOR_ID_MAP[site] page_data = self._download_json( - 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' - % (requestor_id.lower(), display_id), display_id)['data'] + f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', + display_id)['data'] properties = page_data.get('properties') or {} query = { 'mbr': 'true', @@ -76,15 +76,15 @@ def _real_extract(self, url): try: for v in page_data['children']: if v.get('type') == 'video-player': - releasePid = v['properties']['currentVideo']['meta']['releasePid'] - tp_path = 'M_UwQC/' + releasePid + release_pid = v['properties']['currentVideo']['meta']['releasePid'] + tp_path = 'M_UwQC/' + release_pid media_url = 'https://link.theplatform.com/s/' + tp_path video_player_count += 1 except KeyError: pass if video_player_count > 1: self.report_warning( - 'The JSON data has %d video players. Only one will be extracted' % video_player_count) + f'The JSON data has {video_player_count} video players. Only one will be extracted') # Fall back to videoPid if releasePid not found. # TODO: Fall back to videoPid if releasePid manifest uses DRM. @@ -131,7 +131,7 @@ def _real_extract(self, url): }) ns_keys = theplatform_metadata.get('$xmlns', {}).keys() if ns_keys: - ns = list(ns_keys)[0] + ns = next(iter(ns_keys)) episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode_number = int_or_none( theplatform_metadata.get(ns + '$episode')) diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index e889458a2848..a6337e4825c6 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -87,13 +87,13 @@ def _real_extract(self, url): resource_type = 'episodes' resource = self._download_json( - 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) + f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id) video = resource['video'] if is_episode else resource episode = resource if is_episode else resource.get('episode') or {} return { '_type': 'url_transparent', - 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], + 'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']), 'ie_key': 'Zype', 'description': clean_html(video.get('description')), 'timestamp': unified_timestamp(video.get('publishDate')), @@ -174,22 +174,22 @@ def _real_extract(self, url): ] if season_number: - playlist_id = 'season_%d' % season_number - playlist_title = 'Season %d' % season_number + playlist_id = f'season_{season_number}' + playlist_title = f'Season {season_number}' facet_filters.append('search_season_list:' + playlist_title) else: playlist_id = show playlist_title = title season_search = self._download_json( - 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, + f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production', playlist_id, headers={ 'Origin': 'https://www.americastestkitchen.com', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-Application-Id': 'Y1FNZXUI30', }, query={ 'facetFilters': json.dumps(facet_filters), - 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, + 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season', 'attributesToHighlight': '', 'hitsPerPage': 1000, }) @@ -207,7 +207,7 @@ def entries(): 'description': episode.get('description'), 'timestamp': unified_timestamp(episode.get('search_document_date')), 'season_number': season_number, - 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), + 'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')), 'ie_key': AmericasTestKitchenIE.ie_key(), } diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 6b2bf2db2c56..adf4733749e3 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -19,12 +19,12 @@ def _extract_feed_info(self, url): 'Unable to download Akamai AMP feed', transform_source=strip_jsonp) item = feed.get('channel', {}).get('item') if not item: - raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error'])) video_id = item['guid'] def get_media_node(name, default=None): - media_name = 'media-%s' % name + media_name = f'media-{name}' media_group = item.get('media-group') or item return media_group.get(media_name) or item.get(media_name) or item.get(name, default) diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 5e78f372e46c..652154a4a821 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'release_date': '20230121', 'release_timestamp': 1674285179, 'episode_id': 'e1tpt3d', - } + }, }, { # embed url 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', @@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'season': 'Season 2', 'season_number': 2, 'episode_id': 'e1shjqd', - } + }, }] _WEBPAGE_TESTS = [{ @@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', 'uploader': 'Podcast Tempo', 'channel': 'apakatatempo', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 9f5b9b523e36..6800fe3d7fe3 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -15,8 +15,8 @@ class AngelIE(InfoExtractor): 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 1359.0 - } + 'duration': 1359.0, + }, }, { 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', @@ -26,8 +26,8 @@ class AngelIE(InfoExtractor): 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 3276.0 - } + 'duration': 3276.0, + }, }] def _real_extract(self, url): @@ -44,7 +44,7 @@ def _real_extract(self, url): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } # Angel uses cloudinary in the background and supports image transformations. diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py index 2929d6550f80..b1a01791f688 100644 --- a/yt_dlp/extractor/antenna.py +++ b/yt_dlp/extractor/antenna.py @@ -105,7 +105,7 @@ def _real_extract(self, url): info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) if not embed_urls: - raise ExtractorError('no videos found for %s' % video_id, expected=True) + raise ExtractorError(f'no videos found for {video_id}', expected=True) return self.playlist_from_matches( embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0df50333c3e1..bf3d60b5ee5e 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor): 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', - 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } def _generate_nfl_token(self, anvack, mcp_id): @@ -255,7 +255,7 @@ def _generate_nfl_token(self, anvack, mcp_id): token } } -}''' % (anvack, mcp_id), +}''' % (anvack, mcp_id), # noqa: UP031 }).encode(), headers={ 'Authorization': auth_token, 'Content-Type': 'application/json', @@ -299,7 +299,7 @@ def _get_video_json(self, access_key, video_id, extracted_token): return self._download_json( video_data_url, video_id, transform_source=strip_jsonp, query=query, - data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) + data=json.dumps({'api': api}, separators=(',', ':')).encode()) def _get_anvato_videos(self, access_key, video_id, token): video_data = self._get_video_json(access_key, video_id, token) @@ -358,7 +358,7 @@ def _get_anvato_videos(self, access_key, video_id, token): for caption in video_data.get('captions', []): a_caption = { 'url': caption['url'], - 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None, } subtitles.setdefault(caption['language'], []).append(a_caption) subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 455f66795bc7..893dce7b0277 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # video with vidible ID 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', @@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, @@ -83,10 +83,10 @@ def _real_extract(self, url): return self._extract_yahoo_video(video_id, 'us') response = self._download_json( - 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details', video_id)['response'] if response['statusText'] != 'Ok': - raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True) video_data = response['data'] formats = [] diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1ea0b1de45c9..fed597042ad0 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -34,7 +34,7 @@ def _real_extract(self, url): video_id, base_url = mobj.group('id', 'base_url') webpage = self._download_webpage( - '%s/player/%s' % (base_url, video_id), video_id) + f'{base_url}/player/{video_id}', video_id) jwplatform_id = self._search_regex( r'media[iI]d\s*:\s*["\'](?P[a-zA-Z0-9]{8})', webpage, @@ -47,7 +47,7 @@ def _real_extract(self, url): def extract(field, name=None): return self._search_regex( - r'\b%s["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % field, + rf'\b{field}["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, name or field, default=None, group='value') title = extract('title') or video_id diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 49bbeab823eb..bd301e904a37 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor): 'duration': 6454, 'series': 'The Tim Dillon Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', - } + }, }, { 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'only_matching': True, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 21103aee5719..0a600f6df91a 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,8 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, parse_duration, @@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor): 'uploader_id': 'wb', }, }, - ] + ], }, { 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'info_dict': { @@ -99,7 +99,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, movie) film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') film_data = self._download_json( - 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', film_id, fatal=False) if film_data: @@ -114,7 +114,7 @@ def _real_extract(self, url): if not src: continue formats.append({ - 'format_id': '%s-%s' % (version, size), + 'format_id': f'{version}-{size}', 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), @@ -134,7 +134,7 @@ def _real_extract(self, url): page_data = film_data.get('page', {}) return self.playlist_result(entries, film_id, page_data.get('movie_title')) - playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): s = re.sub(r'(?s).*?', '', s) @@ -143,10 +143,9 @@ def fix_html(s): # like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m): - return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) s = re.sub(self._JSON_RE, _clean_json, s) - s = '%s' % s - return s + return f'{s}' doc = self._download_xml(playlist_url, movie, transform_source=fix_html) playlist = [] @@ -170,18 +169,18 @@ def _clean_json(m): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() - settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') formats = [] - for format in settings['metadata']['sizes']: + for fmt in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) formats.append({ 'url': format_url, - 'format': format['type'], - 'width': int_or_none(format['width']), - 'height': int_or_none(format['height']), + 'format': fmt['type'], + 'width': int_or_none(fmt['width']), + 'height': int_or_none(fmt['height']), }) playlist.append({ @@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor): 'title': 'Movie Studios', }, } - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P%s)' % '|'.join(_SECTIONS) + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P{})'.format('|'.join(_SECTIONS)) _TESTS = [{ 'url': 'http://trailers.apple.com/#section=justadded', 'info_dict': { @@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor): def _real_extract(self, url): section = self._match_id(url) section_data = self._download_json( - 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), section) entries = [ self.url_result('http://trailers.apple.com' + e['location']) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 41f3a4ff2709..f5a55efc4ff1 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import json import re import urllib.parse from .common import InfoExtractor from .youtube import YoutubeBaseInfoExtractor, YoutubeIE -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( @@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'Bells Of Rostov', 'ext': 'mp3', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', @@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor): 'description': 'md5:012b2d668ae753be36896f343d12a236', 'upload_date': '20190928', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { # Original formats are private 'url': 'https://archive.org/details/irelandthemakingofarepublic', @@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', }, - } - ] + }, + ], }] @staticmethod @@ -220,7 +221,7 @@ def _playlist_data(webpage): def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) - identifier, entry_id = (video_id.split('/', 1) + [None])[:2] + identifier, _, entry_id = video_id.partition('/') # Archive.org metadata API doesn't clearly demarcate playlist entries # or subtitle tracks, so we get them from the embeddable player. @@ -246,7 +247,7 @@ def _real_extract(self, url): if track['kind'] != 'subtitles': continue entries[p['orig']][track['label']] = { - 'url': 'https://archive.org/' + track['file'].lstrip('/') + 'url': 'https://archive.org/' + track['file'].lstrip('/'), } metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) @@ -293,7 +294,9 @@ def _real_extract(self, url): 'height': int_or_none(f.get('width')), 'filesize': int_or_none(f.get('size'))}) - extension = (f['name'].rsplit('.', 1) + [None])[1] + _, has_ext, extension = f['name'].rpartition('.') + if not has_ext: + extension = None # We don't want to skip private formats if the user has access to them, # however without access to an account with such privileges we can't implement/test this. @@ -308,7 +311,7 @@ def _real_extract(self, url): 'filesize': int_or_none(f.get('size')), 'protocol': 'https', 'source_preference': 0 if f.get('source') == 'original' else -1, - 'format_note': f.get('source') + 'format_note': f.get('source'), }) for entry in entries.values(): @@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/Zeurel', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', - } + }, }, { # Internal link 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', @@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/1veritasium', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', - } + }, }, { # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description @@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'machinima', 'uploader_url': 'https://www.youtube.com/user/machinima', 'thumbnail': r're:https?://.*\.(jpg|webp)', - 'uploader': 'machinima' - } + 'uploader': 'machinima', + }, }, { # FLV video. Video file URL does not provide itag information 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', @@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'jawed', - } + }, }, { 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', 'info_dict': { @@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/itsmadeon', 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # First capture is of dead video, second is the oldest from CDX response. 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', @@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'ETC News', - } + }, }, { # First capture of dead video, capture date in link links to dead capture. 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', @@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'ETC News', }, 'expected_warnings': [ - r'unable to download capture webpage \(it may not be archived\)' - ] + r'unable to download capture webpage \(it may not be archived\)', + ], }, { # Very old YouTube page, has - YouTube in title. 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', 'info_dict': { 'id': '-06-KB9XTzg', 'ext': 'flv', - 'title': 'New Coin Hack!! 100% Safe!!' - } + 'title': 'New Coin Hack!! 100% Safe!!', + }, }, { 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', 'info_dict': { @@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'DankPods', - } + }, }, { # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', @@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'PewDiePie', 'uploader_url': 'https://www.youtube.com/user/PewDiePie', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~June 2010 Capture. swfconfig 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', @@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', 'upload_date': '20090520', - } + }, }, { # Jan 2011: watch-video-date/eow-date surrounded by whitespace 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', @@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 132, 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', - } + }, }, { # ~May 2009 swfArgs. ytcfg is spread out over various vars 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', @@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 754, - } + }, }, { # ~June 2012. Upload date is in another lang so cannot extract. 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', @@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'BlackNerdComedy', 'duration': 182, 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~July 2013 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', @@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', 'upload_date': '20060428', 'uploader': 'punkybird', - } + }, }, { # April 2020: Player response in player config 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', @@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', 'uploader_url': 'https://www.youtube.com/user/GameGrumps', - } + }, }, { # watch7-user-header with yt-user-info 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', @@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'upload_date': '20150503', 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', - } + }, }, { # April 2012 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', @@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 200, 'upload_date': '20120407', 'uploader_id': 'thecomputernerd01', - } + }, }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', - 'only_matching': True + 'only_matching': True, }, { # Video not archived, only capture is unavailable video page 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', - 'only_matching': True + 'only_matching': True, }, { # Encoded url 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc:20050214000000', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc', - 'only_matching': True + 'only_matching': True, }, ] _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE @@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor): _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( - _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) + [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]]) _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' _OLDEST_CAPTURE_DATE = 20050214000000 _NEWEST_CAPTURE_DATE = 20500101000000 - def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): + def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False): # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md query = { 'url': url, @@ -688,14 +691,14 @@ def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = Non 'limit': 500, 'filter': ['statuscode:200'] + (filters or []), 'collapse': collapse or [], - **(query or {}) + **(query or {}), } res = self._download_json( 'https://web.archive.org/cdx/search/cdx', item_id, note or 'Downloading CDX API JSON', query=query, fatal=fatal) if isinstance(res, list) and len(res) >= 2: # format response to make it easier to use - return list(dict(zip(res[0], v)) for v in res[1:]) + return [dict(zip(res[0], v)) for v in res[1:]] elif not isinstance(res, list) or len(res) != 0: self.report_warning('Error while parsing CDX API response' + bug_reports_message()) @@ -852,7 +855,7 @@ def _extract_thumbnails(self, video_id): { 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), 'filesize': int_or_none(thumbnail_dict.get('length')), - 'preference': int_or_none(thumbnail_dict.get('length')) + 'preference': int_or_none(thumbnail_dict.get('length')), } for thumbnail_dict in response) if not try_all: break @@ -893,7 +896,7 @@ def _real_extract(self, url): for retry in retry_manager: try: urlh = self._request_webpage( - HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), + HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'), video_id, note='Fetching archived video file url', expected_status=True) except ExtractorError as e: # HTTP Error 404 is expected if the video is not saved. @@ -924,21 +927,21 @@ def _real_extract(self, url): info['thumbnails'] = self._extract_thumbnails(video_id) if urlh: - url = compat_urllib_parse_unquote(urlh.url) + url = urllib.parse.unquote(urlh.url) video_file_url_qs = parse_qs(url) # Attempt to recover any ext & format info from playback url & response headers - format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} + fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) if itag and itag in YoutubeIE._formats: - format.update(YoutubeIE._formats[itag]) - format.update({'format_id': itag}) + fmt.update(YoutubeIE._formats[itag]) + fmt.update({'format_id': itag}) else: mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) ext = (mimetype2ext(mime) or urlhandle_detect_ext(urlh) or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) - format.update({'ext': ext}) - info['formats'] = [format] + fmt.update({'ext': ext}) + info['formats'] = [fmt] if not info.get('duration'): info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index febd3d28a585..338bada7c89b 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -11,7 +11,7 @@ class ArcPublishingIE(InfoExtractor): _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - _VALID_URL = r'arcpublishing:(?P[a-z]+):(?P%s)' % _UUID_REGEX + _VALID_URL = rf'arcpublishing:(?P[a-z]+):(?P{_UUID_REGEX})' _TESTS = [{ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', @@ -74,12 +74,12 @@ class ArcPublishingIE(InfoExtractor): def _extract_embed_urls(cls, url, webpage): entries = [] # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview - for powa_el in re.findall(r'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): + for powa_el in re.findall(rf'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): powa = extract_attributes(powa_el) or {} org = powa.get('data-org') uuid = powa.get('data-uuid') if org and uuid: - entries.append('arcpublishing:%s:%s' % (org, uuid)) + entries.append(f'arcpublishing:{org}:{uuid}') return entries def _real_extract(self, url): @@ -122,7 +122,7 @@ def _real_extract(self, url): elif stream_type in ('ts', 'hls'): m3u8_formats = self._extract_m3u8_formats( s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) - if all([f.get('acodec') == 'none' for f in m3u8_formats]): + if all(f.get('acodec') == 'none' for f in m3u8_formats): continue for f in m3u8_formats: height = f.get('height') @@ -136,7 +136,7 @@ def _real_extract(self, url): else: vbr = int_or_none(s.get('bitrate')) formats.append({ - 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, + 'format_id': f'{stream_type}-{vbr}' if vbr else stream_type, 'vbr': vbr, 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 3db59c5ca973..6fd641347933 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -85,7 +85,7 @@ def _extract_formats(self, media_info, video_id): formats.extend(self._extract_f4m_formats( update_url_query(stream_url, { 'hdcore': '3.1.1', - 'plugin': 'aasp-3.1.1.69.124' + 'plugin': 'aasp-3.1.1.69.124', }), video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -96,12 +96,12 @@ def _extract_formats(self, media_info, video_id): f = { 'url': server, 'play_path': stream_url, - 'format_id': 'a%s-rtmp-%s' % (num, quality), + 'format_id': f'a{num}-rtmp-{quality}', } else: f = { 'url': stream_url, - 'format_id': 'a%s-%s-%s' % (num, ext, quality) + 'format_id': f'a{num}-{ext}-{quality}', } m = re.search( r'_(?P\d+)x(?P\d+)\.mp4$', diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index de36ec8868bb..b0e853d57a9b 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -64,7 +64,7 @@ def _real_extract(self, url): raise ExtractorError('Invalid URL', expected=True) media = self._download_json( - 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), + f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}', video_id, query={ # https://video.qbrick.com/docs/api/examples/library-api.html 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 9a5524aabed8..f196f611ab8e 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, format_field, @@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor): 'view_count': int, 'tags': ['linearna_algebra'], 'start_time': 10, - } + }, }, { 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', 'only_matching': True, @@ -93,6 +91,6 @@ def _real_extract(self, url): 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), - 'start_time': int_or_none(compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), + 'start_time': int_or_none(urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get('t', [None])[0]), } diff --git a/yt_dlp/extractor/art19.py b/yt_dlp/extractor/art19.py index 271c505daf79..deec7ad012ba 100644 --- a/yt_dlp/extractor/art19.py +++ b/yt_dlp/extractor/art19.py @@ -153,7 +153,7 @@ def _real_extract(self, url): 'series_id': ('series_id', {str}), 'timestamp': ('created_at', {parse_iso8601}), 'release_timestamp': ('released_at', {parse_iso8601}), - 'modified_timestamp': ('updated_at', {parse_iso8601}) + 'modified_timestamp': ('updated_at', {parse_iso8601}), })), **traverse_obj(rss_metadata, ('content', { 'title': ('episode_title', {str}), diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 46fe006cc93b..142d4b066b76 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVIE(ArteTVBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) (?:https?:// (?: - (?:www\.)?arte\.tv/(?P%(langs)s)/videos| - api\.arte\.tv/api/player/v\d+/config/(?P%(langs)s) + (?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos| + api\.arte\.tv/api/player/v\d+/config/(?P{ArteTVBaseIE._ARTE_LANGUAGES}) ) |arte://program) - /(?P\d{6}-\d{3}-[AF]|LIVE) - ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} + /(?P\d{{6}}-\d{{3}}-[AF]|LIVE) + ''' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, @@ -145,7 +145,7 @@ def _real_extract(self, url): language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ - 'x-validated-age': '18' + 'x-validated-age': '18', }) geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} @@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor): 'description': 'md5:be40b667f45189632b78c1425c7c2ce1', 'upload_date': '20201116', }, - 'skip': 'No video available' + 'skip': 'No video available', }, { 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, @@ -262,7 +262,7 @@ def _real_extract(self, url): class ArteTVPlaylistIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?PRC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?PRC-\d{{6}})' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'only_matching': True, @@ -298,7 +298,7 @@ def _real_extract(self, url): class ArteTVCategoryIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/politics-and-society/', 'info_dict': { @@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): @classmethod def suitable(cls, url): return ( - not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) and super().suitable(url)) def _real_extract(self, url): @@ -321,12 +321,12 @@ def _real_extract(self, url): items = [] for video in re.finditer( - r']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + rf']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)', webpage): video = video.group('url') if video == url: continue - if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)): items.append(video) title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 3a44e5265b82..7c8139714f11 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor): 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'duration': 3413, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', @@ -49,7 +49,7 @@ def _perform_login(self, username, password): target_url = self._download_json( 'https://account.atresmedia.com/api/login', None, 'Logging in', headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=urlencode_postdata({ 'username': username, 'password': password, diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py index 3f7b1e9f8d25..b219eeec5c94 100644 --- a/yt_dlp/extractor/atscaleconf.py +++ b/yt_dlp/extractor/atscaleconf.py @@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'data-scale-spring-2022', 'title': 'Data @Scale Spring 2022', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }, { 'url': 'https://atscaleconference.com/events/video-scale-2021/', @@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'video-scale-2021', 'title': 'Video @Scale 2021', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return self.playlist_from_matches( re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), - ie='Generic', playlist_id=id, + ie='Generic', playlist_id=playlist_id, title=self._og_search_title(webpage), description=self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 20ee34cca76a..37bb616952c3 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor): 'id': 'v-ce9cgn1e70n5-1', 'ext': 'mp4', 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', - } + }, }, { 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'only_matching': True, @@ -66,10 +66,10 @@ def _real_extract(self, url): video_id=video_id) video_title = json_data['views']['default']['page']['title'] - contentResource = json_data['views']['default']['page']['contentResource'] - content_id = contentResource[0]['id'] - content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} - for id, content in enumerate(contentResource)] + content_resource = json_data['views']['default']['page']['contentResource'] + content_id = content_resource[0]['id'] + content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} + for id_, content in enumerate(content_resource)] time_of_request = dt.datetime.now() not_before = time_of_request - dt.timedelta(minutes=5) @@ -87,17 +87,17 @@ def _real_extract(self, url): videos = self._download_json( 'https://vas-v4.p7s1video.net/4.0/getsources', content_id, 'Downloading videos JSON', query={ - 'token': jwt_token.decode('utf-8') + 'token': jwt_token.decode('utf-8'), }) - video_id, videos_data = list(videos['data'].items())[0] + video_id, videos_data = next(iter(videos['data'].items())) error_msg = try_get(videos_data, lambda x: x['error']['title']) if error_msg == 'Geo check failed': self.raise_geo_restricted(error_msg) elif error_msg: raise ExtractorError(error_msg) entries = [ - self._extract_video_info(url, contentResource[video['id']], video) + self._extract_video_info(url, content_resource[video['id']], video) for video in videos_data] return { diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 35114e545568..c5a9c7e294a3 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor): 'timestamp': 1448354940, 'duration': 74022, 'view_count': int, - } + }, }, { 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', 'only_matching': True, @@ -73,7 +73,7 @@ def _real_extract(self, url): bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) if bitrate: f.update({ - 'format_id': 'http-%s' % bitrate, + 'format_id': f'http-{bitrate}', }) formats.append(f) diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index a23fcd299989..751b74add749 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 4000.99, 'uploader': 'Sue Perkins: An hour or so with...', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', - } + }, }, { # Direct mp3-file link 'url': 'https://audioboom.com/posts/8128496.mp3', 'md5': 'e329edf304d450def95c7f86a9165ee1', @@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 1689.7, 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', - } + }, }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'only_matching': True, diff --git a/yt_dlp/extractor/audiodraft.py b/yt_dlp/extractor/audiodraft.py index 71e5afd8c8be..484ad4e1ab13 100644 --- a/yt_dlp/extractor/audiodraft.py +++ b/yt_dlp/extractor/audiodraft.py @@ -9,7 +9,7 @@ def _audiodraft_extract_from_id(self, player_entry_id): headers={ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', - }, data=f'id={player_entry_id}'.encode('utf-8')) + }, data=f'id={player_entry_id}'.encode()) return { 'id': str(data_json['entry_id']), @@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_entry_id = self._search_regex( + r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id') return self._audiodraft_extract_from_id(player_entry_id) @@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - return self._audiodraft_extract_from_id(f'player_entry_{id}') + video_id = self._match_id(url) + return self._audiodraft_extract_from_id(f'player_entry_{video_id}') diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 5c4160fe46e8..1d4460c9f819 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -3,7 +3,6 @@ from .common import InfoExtractor from .soundcloud import SoundcloudIE -from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor): 'id': '310086', 'ext': 'mp3', 'uploader': 'Roosh Williams', - 'title': 'Extraordinary' - } + 'title': 'Extraordinary', + }, }, # audiomack wrapper around soundcloud song # Needs new test URL. @@ -56,7 +55,7 @@ def _real_extract(self, url): # API is inconsistent with errors if 'url' not in api_response or not api_response['url'] or 'error' in api_response: - raise ExtractorError('Invalid url %s' % url) + raise ExtractorError(f'Invalid url {url}') # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor @@ -64,7 +63,7 @@ def _real_extract(self, url): return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { - 'id': compat_str(api_response.get('id', album_url_tag)), + 'id': str(api_response.get('id', album_url_tag)), 'uploader': api_response.get('artist'), 'title': api_response.get('title'), 'url': api_response['url'], @@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor): 'info_dict': { 'id': '812251', - 'title': 'Tha Tour: Part 2 (Official Mixtape)' - } + 'title': 'Tha Tour: Part 2 (Official Mixtape)', + }, }, # Album playlist ripped from fakeshoredrive with no metadata { @@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor): 'id': '837576', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }, { 'info_dict': { 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', 'id': '837580', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }], - } + }, ] def _real_extract(self, url): @@ -123,12 +122,12 @@ def _real_extract(self, url): api_response = self._download_json( 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' % (album_url_tag, track_no, time.time()), album_url_tag, - note='Querying song information (%d)' % (track_no + 1)) + note=f'Querying song information ({track_no + 1})') # Total failure, only occurs when url is totally wrong # Won't happen in middle of valid playlist (next case) if 'url' not in api_response or 'error' in api_response: - raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) + raise ExtractorError(f'Invalid url for track {track_no} of album url {url}') # URL is good but song id doesn't exist - usually means end of playlist elif not api_response['url']: break @@ -136,10 +135,10 @@ def _real_extract(self, url): # Pull out the album metadata and add to result (if it exists) for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: if apikey in api_response and resultkey not in result: - result[resultkey] = compat_str(api_response[apikey]) + result[resultkey] = str(api_response[apikey]) song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': compat_str(api_response.get('id', song_id)), + 'id': str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 6448b449b90f..c611c6e0815a 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,7 +1,7 @@ import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ExtractorError, str_or_none, try_get @@ -15,13 +15,13 @@ def _get_response_data(self, response): if response_data is not None: return response_data if len(response) == 1 and 'message' in response: - raise ExtractorError('API error: %s' % response['message'], + raise ExtractorError('API error: {}'.format(response['message']), expected=True) raise ExtractorError('Unexpected API response') def _select_api_base(self): """Selecting one of the currently available API hosts""" - response = super(AudiusBaseIE, self)._download_json( + response = super()._download_json( 'https://api.audius.co/', None, note='Requesting available API hosts', errnote='Unable to request available API hosts') @@ -41,8 +41,8 @@ def _prepare_url(url, title): anything from this link, since the Audius API won't be able to resolve this url """ - url = compat_urllib_parse_unquote(url) - title = compat_urllib_parse_unquote(title) + url = urllib.parse.unquote(url) + title = urllib.parse.unquote(title) if '/' in title or '%2F' in title: fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') return url.replace(title, fixed_title) @@ -54,19 +54,19 @@ def _api_request(self, path, item_id=None, note='Downloading JSON metadata', if self._API_BASE is None: self._select_api_base() try: - response = super(AudiusBaseIE, self)._download_json( - '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, + response = super()._download_json( + f'{self._API_BASE}{self._API_V}{path}', item_id, note=note, errnote=errnote, expected_status=expected_status) except ExtractorError as exc: # some of Audius API hosts may not work as expected and return HTML - if 'Failed to parse JSON' in compat_str(exc): + if 'Failed to parse JSON' in str(exc): raise ExtractorError('An error occurred while receiving data. Try again', expected=True) raise exc return self._get_response_data(response) def _resolve_url(self, url, item_id): - return self._api_request('/resolve?url=%s' % url, item_id, + return self._api_request(f'/resolve?url={url}', item_id, expected_status=404) @@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, { # Regular track @@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, ] _ARTWORK_MAP = { - "150x150": 150, - "480x480": 480, - "1000x1000": 1000 + '150x150': 150, + '480x480': 480, + '1000x1000': 1000, } def _real_extract(self, url): @@ -130,7 +130,7 @@ def _real_extract(self, url): else: # API link title = None # uploader = None - track_data = self._api_request('/tracks/%s' % track_id, track_id) + track_data = self._api_request(f'/tracks/{track_id}', track_id) if not isinstance(track_data, dict): raise ExtractorError('Unexpected API response') @@ -144,7 +144,7 @@ def _real_extract(self, url): if isinstance(artworks_data, dict): for quality_key, thumbnail_url in artworks_data.items(): thumbnail = { - "url": thumbnail_url + 'url': thumbnail_url, } quality_code = self._ARTWORK_MAP.get(quality_key) if quality_code is not None: @@ -154,12 +154,12 @@ def _real_extract(self, url): return { 'id': track_id, 'title': track_data.get('title', title), - 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), + 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream', 'ext': 'mp3', 'description': track_data.get('description'), 'duration': track_data.get('duration'), 'track': track_data.get('title'), - 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), + 'artist': try_get(track_data, lambda x: x['user']['name'], str), 'genre': track_data.get('genre'), 'thumbnails': thumbnails, 'view_count': track_data.get('play_count'), @@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE _TESTS = [ { 'url': 'audius:9RWlo', - 'only_matching': True + 'only_matching': True, }, { 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', - 'only_matching': True + 'only_matching': True, }, ] @@ -207,7 +207,7 @@ def _build_playlist(self, tracks): if not track_id: raise ExtractorError('Unable to get track ID from playlist') entries.append(self.url_result( - 'audius:%s' % track_id, + f'audius:{track_id}', ie=AudiusTrackIE.ie_key(), video_id=track_id)) return entries @@ -231,7 +231,7 @@ def _real_extract(self, url): raise ExtractorError('Unable to get playlist ID') playlist_tracks = self._api_request( - '/playlists/%s/tracks' % playlist_id, + f'/playlists/{playlist_id}/tracks', title, note='Downloading playlist tracks metadata', errnote='Unable to download playlist tracks metadata') if not isinstance(playlist_tracks, list): @@ -267,5 +267,5 @@ def _real_extract(self, url): profile_audius_id = _profile_data[0]['id'] profile_bio = _profile_data[0].get('bio') - api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) + api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id) return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index a8dfb3efcc38..4066a5a83f2a 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,10 +1,7 @@ import base64 +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( format_field, int_or_none, @@ -22,14 +19,14 @@ def _real_extract(self, url): show_id, video_id, season_id = self._match_valid_url(url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + f'http://awaan.ae/media/{video_id}', 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://awaan.ae/program/season/%s' % season_id, + f'http://awaan.ae/program/season/{season_id}', {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + f'http://awaan.ae/program/{show_id}', 'AWAANSeason') class AWAANBaseIE(InfoExtractor): @@ -75,11 +72,11 @@ def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -117,11 +114,11 @@ def _real_extract(self, url): channel_id = self._match_id(url) channel_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -159,7 +156,7 @@ def _real_extract(self, url): show_id = smuggled_data.get('show_id') if show_id is None: season = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id @@ -167,7 +164,7 @@ def _real_extract(self, url): 'http://admin.mangomolo.com/analytics/index.php/plus/show', show_id, data=urlencode_postdata(data), headers={ 'Origin': 'http://awaan.ae', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) if not season_id: season_id = show['default_season'] @@ -177,8 +174,8 @@ def _real_extract(self, url): entries = [] for video in show['videos']: - video_id = compat_str(video['id']) + video_id = str(video['id']) entries.append(self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index 4ebef9295739..177c41027558 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,9 +1,9 @@ import datetime as dt import hashlib import hmac +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -18,20 +18,20 @@ def _aws_execute_api(self, aws_dict, video_id, query=None): 'Accept': 'application/json', 'Host': self._AWS_PROXY_HOST, 'X-Amz-Date': amz_date, - 'X-Api-Key': self._AWS_API_KEY + 'X-Api-Key': self._AWS_API_KEY, } session_token = aws_dict.get('session_token') if session_token: headers['X-Amz-Security-Token'] = session_token def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() + return hashlib.sha256(s.encode()).hexdigest() # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_querystring = urllib.parse.urlencode(query) canonical_headers = '' for header_name, header_value in sorted(headers.items()): - canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + canonical_headers += f'{header_name.lower()}:{header_value}\n' signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) canonical_request = '\n'.join([ 'GET', @@ -39,7 +39,7 @@ def aws_hash(s): canonical_querystring, canonical_headers, signed_headers, - aws_hash('') + aws_hash(''), ]) # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html @@ -49,7 +49,7 @@ def aws_hash(s): # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + return hmac.new(key, msg.encode(), hashlib.sha256) def aws_hmac_digest(key, msg): return aws_hmac(key, msg).digest() @@ -57,7 +57,7 @@ def aws_hmac_digest(key, msg): def aws_hmac_hexdigest(key, msg): return aws_hmac(key, msg).hexdigest() - k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + k_signing = ('AWS4' + aws_dict['secret_key']).encode() for value in credential_scope_list: k_signing = aws_hmac_digest(k_signing, value) @@ -65,11 +65,11 @@ def aws_hmac_hexdigest(key, msg): # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html headers['Authorization'] = ', '.join([ - '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), - 'SignedHeaders=%s' % signed_headers, - 'Signature=%s' % signature, + '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + f'SignedHeaders={signed_headers}', + f'Signature={signature}', ]) return self._download_json( - 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + 'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), video_id, headers=headers) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index d1686eed64be..0e3a03f03fb9 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor): 'timestamp': 1538328802, 'view_count': int, 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031', - 'duration': 1930 + 'duration': 1930, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', - 'only_matching': True + 'only_matching': True, }] _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be' _PARTNER_ID = '1719221' @@ -62,5 +62,5 @@ def _real_extract(self, url): })['data']['context']['mainAsset']['video']['kaltura']['kalturaId'] return self.url_result( - 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + f'kaltura:{self._PARTNER_ID}:{entry_id}', ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 8786d67e06dc..a1ad4240ffd5 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor): }] def _call_api(self, path, category, playlist_id, note): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( - path, category, playlist_id), playlist_id, note) + return self._download_json( + f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}', + playlist_id, note) def _real_extract(self, url): category, playlist_id = self._match_valid_url(url).groups() @@ -44,7 +45,7 @@ def _real_extract(self, url): 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') entries = [self.url_result( - episode['url'], video_title=episode['title'] + episode['url'], video_title=episode['title'], ) for episode in episodes_detail['videos']] return self.playlist_result( diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index c4e07a79a844..d10bdf8daafa 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,10 +1,7 @@ import math +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( InAdvancePagedList, format_field, @@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor): @staticmethod def _extract_playlist_id(url, param='playlist'): - return compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get(param, [None])[0] + return urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get(param, [None])[0] def _extract_playlist(self, playlist_id): data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index e89b3a69b3e8..6128de791b09 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -3,7 +3,6 @@ import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, @@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor): 'uploader_id': 'youtube-dl', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, - '_skip': 'There is a limit of 200 free downloads / month for the test song' + '_skip': 'There is a limit of 200 free downloads / month for the test song', }, { # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', @@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor): def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True): return self._parse_json(self._html_search_regex( - r'data-%s=(["\'])({.+?})\1' % attr, webpage, + rf'data-{attr}=(["\'])({{.+?}})\1', webpage, attr + ' data', group=2), video_id, fatal=fatal) def _real_extract(self, url): @@ -167,7 +166,7 @@ def _real_extract(self, url): download_link = tralbum.get('freeDownloadPage') if download_link: - track_id = compat_str(tralbum['id']) + track_id = str(tralbum['id']) download_webpage = self._download_webpage( download_link, track_id, 'Downloading free downloads page') @@ -192,7 +191,7 @@ def _real_extract(self, url): if isinstance(download_formats_list, list): for f in blob['download_formats']: name, ext = f.get('name'), f.get('file_extension') - if all(isinstance(x, compat_str) for x in (name, ext)): + if all(isinstance(x, str) for x in (name, ext)): download_formats[name] = ext.strip('.') for format_id, f in downloads.items(): @@ -207,7 +206,7 @@ def _real_extract(self, url): }) format_id = f.get('encoding_name') or format_id stat = self._download_json( - stat_url, track_id, 'Downloading %s JSON' % format_id, + stat_url, track_id, f'Downloading {format_id} JSON', transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], fatal=False) if not stat: @@ -225,7 +224,7 @@ def _real_extract(self, url): 'acodec': format_id.split('-')[0], }) - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track if not duration: duration = float_or_none(self._html_search_meta( @@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, { 'md5': '1a2c32e2691474643e912cc6cd4bffaa', @@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, ], 'info_dict': { @@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', }, 'params': { - 'playlistend': 2 + 'playlistend': 2, }, - 'skip': 'Bandcamp imposes download limits.' + 'skip': 'Bandcamp imposes download limits.', }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { @@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): return (False if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) - else super(BandcampAlbumIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): uploader_id, album_id = self._match_valid_url(url).groups() @@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE }, }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -407,7 +406,7 @@ def _real_extract(self, url): title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') if subtitle: - title += ' - %s' % subtitle + title += f' - {subtitle}' return { 'id': show_id, @@ -419,7 +418,7 @@ def _real_extract(self, url): 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_id': show_id, - 'formats': formats + 'formats': formats, } @@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor): 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Discography of dotscale' + 'title': 'Discography of dotscale', }, 'playlist_count': 1, }, { diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 82dc9ab025e7..46f2978f7fbd 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor): 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28', 'upload_date': '20200324', 'timestamp': 1585087895, - } + }, }] _GRAPHQL_GETMETADATA_QUERY = ''' @@ -84,15 +84,15 @@ class BannedVideoIE(InfoExtractor): 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY, } - def _call_api(self, video_id, id, operation, note): + def _call_api(self, video_id, id_var, operation, note): return self._download_json( 'https://api.infowarsmedia.com/graphql', video_id, note=note, headers={ - 'Content-Type': 'application/json; charset=utf-8' + 'Content-Type': 'application/json; charset=utf-8', }, data=json.dumps({ - 'variables': {'id': id}, + 'variables': {'id': id_var}, 'operationName': operation, - 'query': self._GRAPHQL_QUERIES[operation] + 'query': self._GRAPHQL_QUERIES[operation], }).encode('utf8')).get('data') def _get_comments(self, video_id, comments, comment_data): @@ -151,5 +151,5 @@ def _real_extract(self, url): 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')), } diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index f6b58b361f87..3af923f9584d 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -2,10 +2,10 @@ import itertools import json import re +import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ (?: @@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor): radio/player/| events/[^/]+/play/[^/]+/ ) - (?P%s)(?!/(?:episodes|broadcasts|clips)) - ''' % _ID_REGEX + (?P{_ID_REGEX})(?!/(?:episodes|broadcasts|clips)) + ''' _EMBED_REGEX = [r'setPlaylist\("(?Phttps?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)'] _LOGIN_URL = 'https://account.bbc.com/signin' @@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', @@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', 'note': 'Video', @@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', 'info_dict': { @@ -268,19 +268,19 @@ def _perform_login(self, username, password): error = clean_html(get_element_by_class('form-message', response)) if error: raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class MediaSelectionError(Exception): - def __init__(self, id): - self.id = id + def __init__(self, error_id): + self.id = error_id def _extract_asx_playlist(self, connection, programme_id): asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') return [ref.get('href') for ref in asx.findall('./Entry/ref')] def _extract_items(self, playlist): - return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) + return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item') def _extract_medias(self, media_selection): error = media_selection.get('result') @@ -312,7 +312,7 @@ def _get_subtitles(self, media, programme_id): def _raise_extractor_error(self, media_selection_error): raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, media_selection_error.id), + f'{self.IE_NAME} returned error: {media_selection_error.id}', expected=True) def _download_media_selector(self, programme_id): @@ -372,7 +372,7 @@ def _process_media_selector(self, media_selection, programme_id): for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): formats.append({ 'url': ref, - 'format_id': 'ref%s_%s' % (i, format_id), + 'format_id': f'ref{i}_{format_id}', }) elif transfer_format == 'dash': formats.extend(self._extract_mpd_formats( @@ -394,7 +394,7 @@ def _process_media_selector(self, media_selection, programme_id): href, programme_id, f4m_id=format_id, fatal=False)) else: if not supplier and bitrate: - format_id += '-%d' % bitrate + format_id += f'-{bitrate}' fmt = { 'format_id': format_id, 'filesize': file_size, @@ -423,9 +423,9 @@ def _process_media_selector(self, media_selection, programme_id): identifier = connection.get('identifier') server = connection.get('server') fmt.update({ - 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'url': f'{protocol}://{server}/{application}?{auth_string}', 'play_path': identifier, - 'app': '%s?%s' % (application, auth_string), + 'app': f'{application}?{auth_string}', 'page_url': 'http://www.bbc.co.uk', 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', 'rtmp_live': False, @@ -441,7 +441,7 @@ def _process_media_selector(self, media_selection, programme_id): def _download_playlist(self, playlist_id): try: playlist = self._download_json( - 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, + f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json', playlist_id, 'Downloading playlist JSON') formats = [] subtitles = {} @@ -480,32 +480,32 @@ def _process_legacy_playlist_url(self, url, display_id): def _process_legacy_playlist(self, playlist_id): return self._process_legacy_playlist_url( - 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id) + f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id) def _download_legacy_playlist_url(self, url, playlist_id=None): return self._download_xml( url, playlist_id, 'Downloading legacy playlist XML') def _extract_from_legacy_playlist(self, playlist, playlist_id): - no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS) + no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems') if no_items is not None: reason = no_items.get('reason') if reason == 'preAvailability': - msg = 'Episode %s is not yet available' % playlist_id + msg = f'Episode {playlist_id} is not yet available' elif reason == 'postAvailability': - msg = 'Episode %s is no longer available' % playlist_id + msg = f'Episode {playlist_id} is no longer available' elif reason == 'noMedia': - msg = 'Episode %s is not currently available' % playlist_id + msg = f'Episode {playlist_id} is not currently available' else: - msg = 'Episode %s is not available: %s' % (playlist_id, reason) + msg = f'Episode {playlist_id} is not available: {reason}' raise ExtractorError(msg, expected=True) for item in self._extract_items(playlist): kind = item.get('kind') if kind not in ('programme', 'radioProgramme'): continue - title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text - description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) + title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text + description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary') description = description_el.text if description_el is not None else None def get_programme_id(item): @@ -515,7 +515,7 @@ def get_from_attributes(item): if value and re.match(r'^[pb][\da-z]{7}$', value): return value get_from_attributes(item) - mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS) + mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator') if mediator is not None: return get_from_attributes(mediator) @@ -555,7 +555,7 @@ def _real_extract(self, url): if not programme_id: programme_id = self._search_regex( - r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None) + rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None) if programme_id: formats, subtitles = self._download_media_selector(programme_id) @@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': True, - } + }, }, { # article with single video embedded with data-playable containing XML playlist # with direct video links as progressiveDownloadUrl (for now these are extracted) @@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'bbc_world_service', 'series': 'CrowdScience', 'chapters': [], - } + }, }, { # onion routes 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', 'only_matching': True, @@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE) return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) - else super(BBCIE, cls).suitable(url)) + else super().suitable(url)) def _extract_from_media_meta(self, media_meta, video_id): # Direct links to media in media metadata (e.g. @@ -1009,7 +1009,7 @@ def _real_extract(self, url): if playlist: entry = None for key in ('streaming', 'progressiveDownload'): - playlist_url = playlist.get('%sUrl' % key) + playlist_url = playlist.get(f'{key}Url') if not playlist_url: continue try: @@ -1035,7 +1035,7 @@ def _real_extract(self, url): # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 group_id = self._search_regex( - r']+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + rf']+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})', webpage, 'group id', default=None) if group_id: return self.url_result( @@ -1043,9 +1043,9 @@ def _real_extract(self, url): # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, - r']+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, - r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], + [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"', + rf']+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"', + rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'], webpage, 'vpid', default=None) if programme_id: @@ -1142,7 +1142,7 @@ def _real_extract(self, url): video_id, url_transparent=True) entry.update({ 'timestamp': traverse_obj(morph_payload, ( - 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}) + 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}), ), **traverse_obj(video_data, { 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any), @@ -1189,7 +1189,7 @@ def _real_extract(self, url): 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}), 'start_time': ('offset', 'start', {float_or_none}), 'end_time': ('offset', 'end', {float_or_none}), - }) + }), ), } @@ -1287,7 +1287,7 @@ def parse_model(model): 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'duration': ('versions', 0, 'duration', {int}), 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), - }) + }), } def is_type(*types): @@ -1331,7 +1331,7 @@ def parse_media(media): if blocks: summary = [] for block in blocks: - text = try_get(block, lambda x: x['model']['text'], compat_str) + text = try_get(block, lambda x: x['model']['text'], str) if text: summary.append(text) if summary: @@ -1411,9 +1411,9 @@ def parse_media(media): entries, playlist_id, playlist_title, playlist_description) def extract_all(pattern): - return list(filter(None, map( - lambda s: self._parse_json(s, playlist_id, fatal=False), - re.findall(pattern, webpage)))) + return list(filter(None, ( + self._parse_json(s, playlist_id, fatal=False) + for s in re.findall(pattern, webpage)))) # US accessed article with single embedded video (e.g. # https://www.bbc.com/news/uk-68546268) @@ -1435,14 +1435,14 @@ def extract_all(pattern): # Multiple video article (e.g. # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) - EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX + EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?' entries = [] for match in extract_all(r'new\s+SMP\(({.+?})\)'): embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') if embed_url and re.match(EMBED_URL, embed_url): entries.append(embed_url) entries.extend(re.findall( - r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) + rf'setPlaylist\("({EMBED_URL})"\)', webpage)) if entries: return self.playlist_result( [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], @@ -1492,11 +1492,11 @@ def extract_all(pattern): video_id = media_meta.get('externalId') if not video_id: - video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num) + video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}' title = media_meta.get('caption') if not title: - title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num) + title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}' duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration')) @@ -1557,8 +1557,8 @@ def _real_extract(self, url): class BBCCoUkPlaylistBaseIE(InfoExtractor): def _entries(self, webpage, url, playlist_id): - single_page = 'page' in compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) + single_page = 'page' in urllib.parse.parse_qs( + urllib.parse.urlparse(url).query) for page_num in itertools.count(2): for video_id in re.findall( self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): @@ -1572,8 +1572,8 @@ def _entries(self, webpage, url, playlist_id): if not next_page: break webpage = self._download_webpage( - compat_urlparse.urljoin(url, next_page), playlist_id, - 'Downloading page %d' % page_num, page_num) + urllib.parse.urljoin(url, next_page), playlist_id, + f'Downloading page {page_num}', page_num) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -1588,7 +1588,7 @@ def _real_extract(self, url): class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P{BBCCoUkIE._ID_REGEX})' @staticmethod def _get_default(episode, key, default_key='default'): @@ -1712,11 +1712,11 @@ def _call_api(self, pid, per_page, page=1, series_id=None): variables['sliceId'] = series_id return self._download_json( 'https://graph.ibl.api.bbc.co.uk/', pid, headers={ - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }, data=json.dumps({ 'id': '5692d93d5aac8d796a0305e895e61551', 'variables': variables, - }).encode('utf-8'))['data']['programme'] + }).encode())['data']['programme'] @staticmethod def _get_playlist_data(data): @@ -1776,7 +1776,7 @@ def _get_episode(element): def _call_api(self, pid, per_page, page=1, series_id=None): return self._download_json( - 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid, + f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes', pid, query={ 'page': page, 'per_page': per_page, @@ -1792,7 +1792,7 @@ def _get_playlist_title(self, data): class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)' _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' _TESTS = [{ diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index 0aecbd089d33..acc8d125950f 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor): 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", - } + }, }] def _real_extract(self, url): @@ -51,7 +50,7 @@ def _real_extract(self, url): track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' @@ -89,7 +88,7 @@ def _real_extract(self, url): images.append(image) return { - 'id': compat_str(track.get('id')) or track_id, + 'id': str(track.get('id')) or track_id, 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index da98ac314047..960cdfabdd3f 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -23,7 +23,7 @@ class BeegIE(InfoExtractor): 'upload_date': '20220131', 'timestamp': 1643656455, 'display_id': '2540839', - } + }, }, { 'url': 'https://beeg.com/-0599050563103750?t=4-861', 'md5': 'bd8b5ea75134f7f07fad63008db2060e', @@ -38,7 +38,7 @@ class BeegIE(InfoExtractor): 'timestamp': 1643623200, 'display_id': '2569965', 'upload_date': '20220131', - } + }, }, { # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', @@ -55,8 +55,8 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) video = self._download_json( - 'https://store.externulls.com/facts/file/%s' % video_id, - video_id, 'Downloading JSON for %s' % video_id) + f'https://store.externulls.com/facts/file/{video_id}', + video_id, f'Downloading JSON for {video_id}') fc_facts = video.get('fc_facts') first_fact = {} diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 9d2324f4f4e9..45f45d03ba43 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor): 'upload_date': '20141205', 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 677680b428f2..ac45dd477975 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -86,6 +86,6 @@ def _real_extract(self, url): return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), + 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', 'ie_key': 'NineCNineMedia', } diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py index 8160cbd9a73f..5bba33a44c7e 100644 --- a/yt_dlp/extractor/berufetv.py +++ b/yt_dlp/extractor/berufetv.py @@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor): 'tags': ['Studienfilm'], 'duration': 602.440, 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$', - } + }, }] def _real_extract(self, url): @@ -54,7 +54,7 @@ def _real_extract(self, url): subtitles.setdefault(track['language'], []).append({ 'url': track['source'], 'name': track.get('label'), - 'ext': 'vtt' + 'ext': 'vtt', }) return { diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index cbf3dd0824e7..3a8e7430921e 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download @@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download 'skip_download': True, }, - } + }, ] - _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player' def _get_feed_query(self, uri): return { diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index c4621ca82620..87f011783bfe 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE): 'timestamp': 1673341692, 'duration': 109.269, 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], - 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' - } + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 02d1ba0e3f64..9c55bb9682ef 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,10 +1,8 @@ +import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) class BigflixIE(InfoExtractor): @@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { # multiple formats 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', @@ -38,7 +36,7 @@ def _real_extract(self, url): webpage, 'title') def decode_url(quoted_b64_url): - return compat_b64decode(compat_urllib_parse_unquote( + return base64.b64decode(urllib.parse.unquote( quoted_b64_url)).decode('utf-8') formats = [] @@ -47,7 +45,7 @@ def decode_url(quoted_b64_url): video_url = decode_url(encoded_url) f = { 'url': video_url, - 'format_id': '%sp' % height, + 'format_id': f'{height}p', 'height': int(height), } if video_url.startswith('rtmp'): @@ -69,5 +67,5 @@ def decode_url(quoted_b64_url): 'id': video_id, 'title': title, 'description': description, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index acf78e49a735..b1c230f35732 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -36,7 +36,7 @@ def _real_extract(self, url): raise ExtractorError('Received invalid JSON data') if info_raw.get('code'): raise ExtractorError( - 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True) + 'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True) info = info_raw.get('data') or {} if not info.get('alive'): diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index eb289329d8bb..2ba63700c6cb 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -20,7 +20,7 @@ class BildIE(InfoExtractor): 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 196, - } + }, }, { 'note': 'static MP4 and HLS', 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html', @@ -32,7 +32,7 @@ class BildIE(InfoExtractor): 'description': 'md5:709b543c24dc31bbbffee73bccda34ad', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 69, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b38c90b1d104..411b48c282d7 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -112,7 +112,7 @@ def _get_subtitles(self, video_id, cid, aid=None): 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', - }] + }], } subtitle_info = traverse_obj(self._download_json( @@ -126,7 +126,7 @@ def _get_subtitles(self, video_id, cid, aid=None): for s in subs_list: subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', - 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) + 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)), }) return subtitles @@ -215,7 +215,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo): yield { **metainfo, 'id': f'{video_id}_{cid}', - 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}', + 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}', 'formats': self.extract_formats(play_info), 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'duration': float_or_none(play_info.get('timelength'), scale=1000), @@ -269,7 +269,7 @@ class BiliBiliIE(BilibiliBaseIE): 'url': 'https://www.bilibili.com/video/BV1bK411W797', 'info_dict': { 'id': 'BV1bK411W797', - 'title': '物语中的人物是如何吐槽自己的OP的' + 'title': '物语中的人物是如何吐槽自己的OP的', }, 'playlist_count': 18, 'playlist': [{ @@ -288,8 +288,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }] + }, + }], }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -308,7 +308,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } + }, }, { 'note': 'video has subtitles', 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', @@ -327,7 +327,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', - 'subtitles': 'count:2' + 'subtitles': 'count:2', }, 'params': {'listsubtitles': True}, }, { @@ -586,10 +586,9 @@ def _real_extract(self, url): is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) if is_interactive: return self.playlist_result( - self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{ - 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), - '__post_extractor': self.extract_comments(aid), - }) + self._get_interactive_entries(video_id, cid, metainfo), **metainfo, + duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), + __post_extractor=self.extract_comments(aid)) else: return { **metainfo, @@ -640,7 +639,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1425.256, 'timestamp': 1554566400, 'upload_date': '20190406', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'skip': 'Geo-restricted', }, { @@ -661,7 +660,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1922.129, 'timestamp': 1602853860, 'upload_date': '20201016', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }] @@ -764,7 +763,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'duration': 1525.777, 'timestamp': 1425074413, 'upload_date': '20150227', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -794,7 +793,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'title': '鬼灭之刃', 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b', }, - 'playlist_mincount': 26 + 'playlist_mincount': 26, }, { 'url': 'https://www.bilibili.com/bangumi/play/ss2251', 'info_dict': { @@ -819,7 +818,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'duration': 1436.992, 'timestamp': 1343185080, 'upload_date': '20120725', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -906,7 +905,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -939,7 +938,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }], 'params': {'playlist_items': '1'}, }, { @@ -1012,7 +1011,7 @@ def _extract_signature(self, playlist_id): for position in ( 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, - 57, 62, 11, 36, 20, 34, 44, 52 + 57, 62, 11, 36, 20, 34, 44, 52, ): char_at_position = try_call(lambda: session_key[position]) if char_at_position: @@ -1163,7 +1162,7 @@ def get_metadata(page_data): 'uploader_id': ('meta', 'mid', {str_or_none}), 'timestamp': ('meta', 'ptime', {int_or_none}), 'thumbnail': ('meta', 'cover', {url_or_none}), - }) + }), } def get_entries(page_data): @@ -1195,7 +1194,7 @@ def _real_extract(self, url): mid, sid = self._match_valid_url(url).group('mid', 'sid') playlist_id = f'{mid}_{sid}' playlist_meta = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False + f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False, ), { 'title': ('data', 'meta', 'name', {str}), 'description': ('data', 'meta', 'description', {str}), @@ -1217,7 +1216,7 @@ def get_metadata(page_data): 'page_count': math.ceil(entry_count / page_size), 'page_size': page_size, 'uploader': self._get_uploader(mid, playlist_id), - **playlist_meta + **playlist_meta, } def get_entries(page_data): @@ -1241,7 +1240,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE): 'upload_date': '20201109', 'modified_timestamp': int, 'modified_date': str, - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', 'view_count': int, 'like_count': int, }, @@ -1345,7 +1344,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'uploader_id': '84912', 'timestamp': 1604905176, 'upload_date': '20201109', - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', }, 'playlist_mincount': 22, }, { @@ -1371,7 +1370,7 @@ def _extract_medialist(self, query, list_id): for page_num in itertools.count(1): page_data = self._download_json( 'https://api.bilibili.com/x/v2/medialist/resource/list', - list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}' + list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}', )['data'] yield from self._get_entries(page_data, 'media_list', ending_key='bv_id') query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id')) @@ -1407,7 +1406,7 @@ def _real_extract(self, url): 'tid': ('tid', {int_or_none}), 'sort_field': ('sortFiled', {int_or_none}), 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}), - }) + }), } metadata = { 'id': f'{query["type"]}_{query["biz_id"]}', @@ -1430,26 +1429,26 @@ class BilibiliCategoryIE(InfoExtractor): 'url': 'https://www.bilibili.com/v/kichiku/mad', 'info_dict': { 'id': 'kichiku: mad', - 'title': 'kichiku: mad' + 'title': 'kichiku: mad', }, 'playlist_mincount': 45, 'params': { - 'playlistend': 45 - } + 'playlistend': 45, + }, }] def _fetch_page(self, api_url, num_pages, query, page_num): parsed_json = self._download_json( api_url, query, query={'Search_key': query, 'pn': page_num}, - note='Extracting results from page %s of %s' % (page_num, num_pages)) + note=f'Extracting results from page {page_num} of {num_pages}') video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list) if not video_list: - raise ExtractorError('Failed to retrieve video list for page %d' % page_num) + raise ExtractorError(f'Failed to retrieve video list for page {page_num}') for video in video_list: yield self.url_result( - 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid']) + 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid']) def _entries(self, category, subcategory, query): # map of categories : subcategories : RIDs @@ -1459,7 +1458,7 @@ def _entries(self, category, subcategory, query): 'manual_vocaloid': 126, 'guide': 22, 'theatre': 216, - 'course': 127 + 'course': 127, }, } @@ -1485,7 +1484,7 @@ def _entries(self, category, subcategory, query): def _real_extract(self, url): category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4] - query = '%s: %s' % (category, subcategory) + query = f'{category}: {subcategory}' return self.playlist_result(self._entries(category, subcategory, query), query, query) @@ -1588,7 +1587,7 @@ def _real_extract(self, url): formats = [{ 'url': play_data['cdns'][0], 'filesize': int_or_none(play_data.get('size')), - 'vcodec': 'none' + 'vcodec': 'none', }] for a_format in formats: @@ -1606,7 +1605,7 @@ def _real_extract(self, url): subtitles = { 'origin': [{ 'url': lyric, - }] + }], } return { @@ -1674,7 +1673,7 @@ class BiliBiliPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'http://www.bilibili.tv/video/av%s/' % video_id, + f'http://www.bilibili.tv/video/av{video_id}/', ie=BiliBiliIE.ie_key(), video_id=video_id) @@ -1702,11 +1701,10 @@ def _call_api(self, endpoint, *args, **kwargs): return json.get('data') def json2srt(self, json): - data = '\n\n'.join( + return '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' for i, line in enumerate(traverse_obj(json, ( 'body', lambda _, l: l['content'] and l['from'] and l['to'])))) - return data def _get_subtitles(self, *, ep_id=None, aid=None): sub_json = self._call_api( @@ -1808,14 +1806,14 @@ def _perform_login(self, username, password): note='Downloading login key', errnote='Unable to download login key')['data'] public_key = Cryptodome.RSA.importKey(key_data['key']) - password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode()) login_post = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'username': username, 'password': base64.b64encode(password_hash).decode('ascii'), 'keep_me': 'true', 's_locale': 'en_US', - 'isTrusted': 'true' + 'isTrusted': 'true', }), note='Logging in', errnote='Unable to log in') if login_post.get('code'): if login_post.get('message'): @@ -1842,17 +1840,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 76.242, - 'title': '' + 'title': '', }, { 'start_time': 76.242, 'end_time': 161.161, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1325.742, 'end_time': 1403.903, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Non-Bstation page 'url': 'https://www.bilibili.tv/en/play/1033760/11005006', @@ -1869,17 +1867,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 88.0, - 'title': '' + 'title': '', }, { 'start_time': 88.0, 'end_time': 156.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1173.0, 'end_time': 1259.535, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Subtitle with empty content 'url': 'https://www.bilibili.tv/en/play/1005144/10131790', @@ -1890,7 +1888,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 140, }, - 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.', }, { # episode comment extraction 'url': 'https://www.bilibili.tv/en/play/34580/340317', @@ -1908,20 +1906,20 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 61.0, - 'title': '' + 'title': '', }, { 'start_time': 61.0, 'end_time': 134.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1290.0, 'end_time': 1379.0, - 'title': 'Outro' + 'title': 'Outro', }], }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # user generated content comment extraction 'url': 'https://www.bilibili.tv/en/video/2045730385', @@ -1936,8 +1934,8 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg', }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # episode id without intro and outro 'url': 'https://www.bilibili.tv/en/play/1048837/11246489', @@ -1992,7 +1990,7 @@ def _extract_video_metadata(self, url, video_id, season_id): # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) video_data = traverse_obj(season_json, ( - 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id, ), expected_type=dict, get_all=False) # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found @@ -2024,7 +2022,7 @@ def _get_comments_reply(self, root_id, next_id=0, display_id=None): 'id': replies.get('rpid'), 'like_count': int_or_none(replies.get('like_count')), 'parent': replies.get('parent'), - 'timestamp': unified_timestamp(replies.get('ctime_text')) + 'timestamp': unified_timestamp(replies.get('ctime_text')), } if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): @@ -2077,11 +2075,11 @@ def _real_extract(self, url): chapters = [{ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000), - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000), - 'title': 'Outro' + 'title': 'Outro', }] return { @@ -2137,7 +2135,7 @@ def _entries(self, series_id): episode_id = str(episode['episode_id']) yield self.url_result(smuggle_url( BiliIntlIE._make_url(episode_id, series_id), - self._parse_video_metadata(episode) + self._parse_video_metadata(episode), ), BiliIntlIE, episode_id) def _real_extract(self, url): @@ -2156,19 +2154,19 @@ class BiliLiveIE(InfoExtractor): 'url': 'https://live.bilibili.com/196', 'info_dict': { 'id': '33989', - 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)', 'ext': 'flv', - 'title': "太空狼人杀联动,不被爆杀就算赢", - 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'title': '太空狼人杀联动,不被爆杀就算赢', + 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg', 'timestamp': 1650802769, }, - 'skip': 'not live' + 'skip': 'not live', }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://live.bilibili.com/blanc/196', - 'only_matching': True + 'only_matching': True, }] _FORMATS = { @@ -2209,7 +2207,7 @@ def _real_extract(self, url): raise ExtractorError('Streamer is not live', expected=True) formats = [] - for qn in self._FORMATS.keys(): + for qn in self._FORMATS: stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { 'room_id': room_id, 'qn': qn, diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 194bf1f4683a..c74f34c2a967 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20170103', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, }, { # test case: video with different channel and uploader @@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20231106', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'channel': 'Full Measure with Sharyl Attkisson', - 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' + 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', }, }, { # video not downloadable in browser, but we can recover it @@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, 'params': {'check_formats': None}, }, { @@ -115,7 +115,7 @@ def _check_format(self, video_url, video_id): continue return { 'url': url, - 'filesize': int_or_none(response.headers.get('Content-Length')) + 'filesize': int_or_none(response.headers.get('Content-Length')), } def _raise_if_restricted(self, webpage): @@ -196,7 +196,7 @@ class BitChuteChannelIE(InfoExtractor): 'duration': 16, 'view_count': int, }, - } + }, ], 'params': { 'skip_download': True, @@ -209,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor): 'id': 'wV9Imujxasw9', 'title': 'Bruce MacDonald and "The Light of Darkness"', 'description': 'md5:747724ef404eebdfc04277714f81863e', - } + }, }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' @@ -224,7 +224,7 @@ class BitChuteChannelIE(InfoExtractor): 'container': 'playlist-video', 'title': 'title', 'description': 'description', - } + }, } diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8f41c897adfb..535890979b07 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -47,7 +47,7 @@ def _real_extract(self, url): region = mobj.group('region') video_id = mobj.group('id') info = self._download_json( - 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id) + f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) duration = info.get('duration') title = info['name'] upload_date = info.get('created') diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index aa3d63ee7b34..71b237d4b2f9 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor): def _real_extract(self, url): article_id = self._match_id(url) - article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article'] thumbnails = [] primary_photo = article_data.get('primaryPhoto') @@ -71,11 +71,11 @@ def _real_extract(self, url): if video: video_type = video['type'] if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): - info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id']) elif video_type == 'youtube.com': info['url'] = video['id'] elif video_type == 'vine.co': - info['url'] = 'https://vine.co/v/%s' % video['id'] + info['url'] = 'https://vine.co/v/{}'.format(video['id']) else: info['url'] = video_type + video['id'] return info @@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE): }, 'expected_warnings': [ - 'Unable to download f4m manifest' - ] + 'Unable to download f4m manifest', + ], }] def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) + info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai') info['id'] = video_id return info diff --git a/yt_dlp/extractor/blerp.py b/yt_dlp/extractor/blerp.py index 4631ad2e971b..f4f22488e9f1 100644 --- a/yt_dlp/extractor/blerp.py +++ b/yt_dlp/extractor/blerp.py @@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor): 'uploader_id': '5fb81e51aa66ae000c395478', 'ext': 'mp3', 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], - } + }, }, { 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', 'info_dict': { @@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor): 'uploader': '179617322678353920', 'uploader_id': '5ba99cf71386730004552c42', 'ext': 'mp3', - 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] - } + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'], + }, }] - _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_OPERATIONNAME = 'webBitePageGetBite' _GRAPHQL_QUERY = ( '''query webBitePageGetBite($_id: MongoID!) { web { @@ -141,27 +141,26 @@ def _real_extract(self, url): 'operationName': self._GRAPHQL_OPERATIONNAME, 'query': self._GRAPHQL_QUERY, 'variables': { - '_id': audio_id - } + '_id': audio_id, + }, } headers = { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } - json_result = self._download_json('https://api.blerp.com/graphql', - audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + json_result = self._download_json( + 'https://api.blerp.com/graphql', audio_id, + data=json.dumps(data).encode(), headers=headers) bite_json = json_result['data']['web']['biteById'] - info_dict = { + return { 'id': bite_json['_id'], 'url': bite_json['audio']['mp3']['url'], 'title': bite_json['title'], 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), 'ext': 'mp3', - 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None), } - - return info_dict diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index ef0151de672f..1614b6f947b2 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:^https?://.*', 'duration': 76.068, - } + }, }] def _real_extract(self, url): token_id = self._match_id(url) webpage = self._download_webpage(url, token_id) data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') - data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id) streams = data['streams'] formats = [{ 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 792155e51ac3..ec6b7a86eb1f 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -55,7 +55,7 @@ def _real_extract(self, url): title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) + f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url') diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index ca326f25fad5..5fe937a6acba 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ExtractorError @@ -9,20 +10,18 @@ def _extract_bokecc_formats(self, webpage, video_id, format_id=None): r'<(?:script|embed)[^>]+src=(?P["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P.+?)(?P=q)', webpage, 'player params', group='query') - player_params = compat_parse_qs(player_params_str) + player_params = urllib.parse.parse_qs(player_params_str) info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + 'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format( player_params['siteid'][0], player_params['vid'][0]), video_id) - formats = [{ + return [{ 'format_id': format_id, 'url': quality.find('./copy').attrib['playurl'], 'quality': int(quality.attrib['value']), } for quality in info_xml.findall('./video/quality')] - return formats - class BokeCCIE(BokeCCBaseIE): _IE_DESC = 'CC视频' @@ -38,11 +37,11 @@ class BokeCCIE(BokeCCBaseIE): }] def _real_extract(self, url): - qs = compat_parse_qs(self._match_valid_url(url).group('query')) + qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query')) if not qs.get('vid') or not qs.get('uid'): raise ExtractorError('Invalid URL', expected=True) - video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index bf955668dfb0..ab85477de45c 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, try_get, @@ -38,7 +37,7 @@ def _real_extract(self, url): channel_id = mobj.group('id') amf = self._download_json( - 'https://%s/tools/amf.php' % host, channel_id, + f'https://{host}/tools/amf.php', channel_id, data=urlencode_postdata(( ('method', 'getRoomData'), ('args[]', channel_id), @@ -48,14 +47,14 @@ def _real_extract(self, url): server_url = amf['localData']['videoServerUrl'] uploader_id = try_get( - amf, lambda x: x['performerData']['username'], compat_str) or channel_id + amf, lambda x: x['performerData']['username'], str) or channel_id uploader = try_get( - amf, lambda x: x['performerData']['displayName'], compat_str) + amf, lambda x: x['performerData']['displayName'], str) like_count = int_or_none(try_get( amf, lambda x: x['performerData']['loversCount'])) formats = self._extract_m3u8_formats( - '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), + f'{server_url}/hls/stream_{uploader_id}/playlist.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) return { diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 267586687258..f5b819678865 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -57,8 +57,7 @@ def _real_extract(self, url): if video_id and account_id and player_id and embed: entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, embed, video_id)) + f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}') if len(entries) == 0: return self.url_result(url, 'Generic') diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 008c011cc8aa..3547ad9973dc 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -72,20 +72,20 @@ def _real_extract(self, url): 'BoxApi': 'shared_link=' + shared_link, 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats }, query={ - 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' + 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size', }) title = f['name'] query = { 'access_token': access_token, - 'shared_link': shared_link + 'shared_link': shared_link, } formats = [] for url_tmpl in traverse_obj(f, ( 'representations', 'entries', lambda _, v: v['representation'] == 'dash', - 'content', 'url_template', {url_or_none} + 'content', 'url_template', {url_or_none}, )): manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query) fmts = self._extract_mpd_formats(manifest_url, file_id) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index da06cc3f860f..efa66994aaad 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -21,7 +21,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }, { 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad', 'info_dict': { @@ -30,8 +30,8 @@ class BoxCastVideoIE(InfoExtractor): 'uploader_id': 'vctwevwntun3o0ikq7af', 'uploader': 'Legacy Christian Church', 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools', - 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg' - } + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg', + }, }, { 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev', 'info_dict': { @@ -44,7 +44,7 @@ class BoxCastVideoIE(InfoExtractor): 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland', 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340', 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://childrenshealthdefense.eu/live-stream/', @@ -57,7 +57,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 6e1c63e2bb60..0568e06f68fa 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -61,7 +61,7 @@ class BRIE(InfoExtractor): 'title': 'Umweltbewusster Häuslebauer', 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', 'duration': 116, - } + }, }, { 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', @@ -74,7 +74,7 @@ class BRIE(InfoExtractor): 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20170208', - } + }, }, ] @@ -142,7 +142,7 @@ def _extract_formats(self, assets, media_id): http_format_info = format_info.copy() http_format_info.update({ 'url': format_url, - 'format_id': 'http-%s' % asset_type, + 'format_id': f'http-{asset_type}', }) formats.append(http_format_info) server_prefix = xpath_text(asset, 'serverPrefix') @@ -151,7 +151,7 @@ def _extract_formats(self, assets, media_id): rtmp_format_info.update({ 'url': server_prefix, 'play_path': xpath_text(asset, 'fileName'), - 'format_id': 'rtmp-%s' % asset_type, + 'format_id': f'rtmp-{asset_type}', }) formats.append(rtmp_format_info) return formats diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 04b1dd80c838..df10299a0c04 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -52,8 +52,8 @@ def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', ex '%s': {}, 'ad_%s': { 'format_note': 'Audio description', - 'source_preference': -2 - } + 'source_preference': -2, + }, } for additional_key_format, additional_key_fields in additional_key_formats.items(): for key_quality, key_index in enumerate(('high', 'low')): @@ -62,7 +62,7 @@ def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', ex formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { 'quality': -1 - key_quality, **additional_key_fields, - **extra_fields + **extra_fields, })) return formats @@ -72,7 +72,7 @@ def _perform_login(self, username, password): data=json.dumps({'username': username, 'password': password}).encode(), headers={ 'Content-Type': 'application/json', - 'Referer': self._ORIGIN + 'Referer': self._ORIGIN, }, note='Logging in', errnote='Unable to log in', expected_status=400) status_code = int_or_none(login_res['status_code']) if status_code != 1505: @@ -131,12 +131,12 @@ def _real_extract(self, url): formats, subtitles = [], {} formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { 'language': movie_feature.get('language') or 'en', - 'language_preference': 10 + 'language_preference': 10, })) for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { 'language': lang, - 'language_preference': -10 + 'language_preference': -10, })) # TODO: Do localization fields also have subtitles? @@ -145,7 +145,7 @@ def _real_extract(self, url): r'^subtitles_(?P\w+)$', name, 'subtitle metadata', default=None) if lang and url: subtitles.setdefault(lang, []).append({ - 'url': urljoin(self._CDN_URL, url) + 'url': urljoin(self._CDN_URL, url), }) return { diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 419fe8c9c8a3..ec72f0d8845d 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -185,5 +185,5 @@ def _real_extract(self, url): 'episode_number': ('episodeNumber', {int_or_none}), 'episode': 'episodeTitle', 'series': 'show', - })) + })), } diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index b5abb7f19471..fedf4772a974 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -13,7 +13,7 @@ class BreitBartIE(InfoExtractor): 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', 'age_limit': 0, - } + }, }, { 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', 'only_matching': True, @@ -30,5 +30,5 @@ def _real_extract(self, url): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 4190e1a0992c..dc0c83572a2f 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,15 +1,12 @@ import base64 import re import struct +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_parse_qs, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -142,7 +139,7 @@ class BrightcoveLegacyIE(InfoExtractor): # from http://www.un.org/chinese/News/story.asp?NewsID=27724 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', 'only_matching': True, # Tested in GenericIE - } + }, ] _WEBPAGE_TESTS = [{ @@ -315,7 +312,7 @@ def _build_brightcove_url(cls, object_str): object_str = fix_xml_ampersands(object_str) try: - object_doc = compat_etree_fromstring(object_str.encode('utf-8')) + object_doc = compat_etree_fromstring(object_str.encode()) except xml.etree.ElementTree.ParseError: return @@ -323,7 +320,7 @@ def _build_brightcove_url(cls, object_str): if fv_el is not None: flashvars = dict( (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items()) else: flashvars = {} @@ -340,32 +337,32 @@ def find_param(name): params = {} - playerID = find_param('playerID') or find_param('playerId') - if playerID is None: + player_id = find_param('playerID') or find_param('playerId') + if player_id is None: raise ExtractorError('Cannot find player ID') - params['playerID'] = playerID + params['playerID'] = player_id - playerKey = find_param('playerKey') + player_key = find_param('playerKey') # Not all pages define this value - if playerKey is not None: - params['playerKey'] = playerKey + if player_key is not None: + params['playerKey'] = player_key # These fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') - if videoPlayer is not None: - if isinstance(videoPlayer, list): - videoPlayer = videoPlayer[0] - videoPlayer = videoPlayer.strip() + video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') + if video_player is not None: + if isinstance(video_player, list): + video_player = video_player[0] + video_player = video_player.strip() # UUID is also possible for videoPlayer (e.g. # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd # or http://www8.hp.com/cn/zh/home.html) if not (re.match( r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', - videoPlayer) or videoPlayer.startswith('ref:')): + video_player) or video_player.startswith('ref:')): return None - params['@videoPlayer'] = videoPlayer - linkBase = find_param('linkBaseURL') - if linkBase is not None: - params['linkBaseURL'] = linkBase + params['@videoPlayer'] = video_player + link_base = find_param('linkBaseURL') + if link_base is not None: + params['linkBaseURL'] = link_base return cls._make_brightcove_url(params) @classmethod @@ -448,13 +445,13 @@ def _real_extract(self, url): url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = self._match_valid_url(url) query_str = mobj.group('query') - query = compat_urlparse.parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) - videoPlayer = query.get('@videoPlayer') - if videoPlayer: + video_player = query.get('@videoPlayer') + if video_player: # We set the original url as the default 'Referer' header referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) - video_id = videoPlayer[0] + video_id = video_player[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: @@ -483,7 +480,7 @@ def _real_extract(self, url): enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] if publisher_id: - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}' if referer: brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) @@ -543,9 +540,9 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}): def build_format_id(kind): format_id = kind if tbr: - format_id += '-%dk' % int(tbr) + format_id += f'-{int(tbr)}k' if height: - format_id += '-%dp' % height + format_id += f'-{height}p' return format_id if src or streaming_src: @@ -654,7 +651,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -666,7 +663,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', 'only_matching': True, @@ -833,8 +830,7 @@ def _extract_brightcove_urls(ie, webpage): player_id = player_id or attrs.get('data-player') or 'default' embed = embed or attrs.get('data-embed') or 'default' - bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( - account_id, player_id, embed, video_id) + bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}' # Some brightcove videos may be embedded with video tag only and # without script tag or any mentioning of brightcove at all. Such @@ -865,13 +861,13 @@ def _real_extract(self, url): account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups() - policy_key_id = '%s_%s' % (account_id, player_id) + policy_key_id = f'{account_id}_{player_id}' policy_key = self.cache.load('brightcove', policy_key_id) policy_key_extracted = False store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x) def extract_policy_key(): - base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) + base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/' config = self._download_json( base_url + 'config.json', video_id, fatal=False) or {} policy_key = try_get( @@ -910,7 +906,7 @@ def extract_policy_key(): if not policy_key: policy_key = extract_policy_key() policy_key_extracted = True - headers['Accept'] = 'application/json;pk=%s' % policy_key + headers['Accept'] = f'application/json;pk={policy_key}' try: json_data = self._download_json(api_url, video_id, headers=headers) break @@ -936,7 +932,7 @@ def extract_policy_key(): custom_fields['bcadobepassresourceid']) json_data = self._download_json( api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key + 'Accept': f'application/json;pk={policy_key}', }, query={ 'tveToken': tve_token, }) diff --git a/yt_dlp/extractor/bundesliga.py b/yt_dlp/extractor/bundesliga.py index e76dd58ddbb3..29f8f941576c 100644 --- a/yt_dlp/extractor/bundesliga.py +++ b/yt_dlp/extractor/bundesliga.py @@ -16,17 +16,17 @@ class BundesligaIE(InfoExtractor): 'upload_date': '20220928', 'duration': 146, 'timestamp': 1664366511, - 'description': 'md5:803d4411bd134140c774021dd4b7598b' - } + 'description': 'md5:803d4411bd134140c774021dd4b7598b', + }, }, { 'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA', - 'only_matching': True - } + 'only_matching': True, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py index 4b3f5e68b85c..7cb9af692a7c 100644 --- a/yt_dlp/extractor/businessinsider.py +++ b/yt_dlp/extractor/businessinsider.py @@ -10,7 +10,7 @@ class BusinessInsiderIE(InfoExtractor): 'info_dict': { 'id': 'cjGDb0X9', 'ext': 'mp4', - 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant", + 'title': 'Bananas give you more radiation exposure than living next to a nuclear power plant', 'description': 'md5:0175a3baf200dd8fa658f94cade841b3', 'upload_date': '20160611', 'timestamp': 1465675620, @@ -41,5 +41,5 @@ def _real_extract(self, url): r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'), webpage, 'jwplatform id') return self.url_result( - 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), + f'jwplatform:{jwplatform_id}', ie=JWPlatformIE.ie_key(), video_id=video_id) diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py index b30a3b7ae253..9847095bcf4d 100644 --- a/yt_dlp/extractor/buzzfeed.py +++ b/yt_dlp/extractor/buzzfeed.py @@ -23,8 +23,8 @@ class BuzzFeedIE(InfoExtractor): 'upload_date': '20141024', 'uploader_id': 'Buddhanz1', 'uploader': 'Angry Ram', - } - }] + }, + }], }, { 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', 'params': { @@ -45,7 +45,7 @@ class BuzzFeedIE(InfoExtractor): 'uploader_id': 'CindysMunchkin', 'uploader': 're:^Munchkin the', }, - }] + }], }, { 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', 'info_dict': { diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index ad35427ed750..e9796f7dabf4 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -36,7 +36,7 @@ class BYUtvIE(InfoExtractor): 'duration': 11645, }, 'params': { - 'skip_download': True + 'skip_download': True, }, }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index e4b1c9a84c30..6264803dd683 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -38,7 +38,7 @@ def _real_extract(self, url): return self.url_result(sohu_video_info['url'], 'Sohu') page = self._download_json( - 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + f'http://vxml.56.com/json/{text_id}/', text_id, 'Downloading video info') info = page['info'] @@ -46,7 +46,7 @@ def _real_extract(self, url): { 'format_id': f['type'], 'filesize': int(f['filesize']), - 'url': f['url'] + 'url': f['url'], } for f in info['rfiles'] ] diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index c77179c7bbec..b7061a7d141e 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -29,8 +29,8 @@ class CallinIE(InfoExtractor): 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553', 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions', 'episode_number': 1, - 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd' - } + 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd', + }, }, { 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', 'md5': '14ede27ee2c957b7e4db93140fc0745c', @@ -54,7 +54,7 @@ class CallinIE(InfoExtractor): 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png', 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', 'timestamp': 1662100688.005, - } + }, }, { 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA', 'md5': '16f704ddbf82a27e3930533b12062f07', @@ -78,7 +78,7 @@ class CallinIE(InfoExtractor): 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png', 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', 'timestamp': 1661476708.282, - } + }, }] def try_get_user_name(self, d): @@ -94,7 +94,7 @@ def _real_extract(self, url): next_data = self._search_nextjs_data(webpage, display_id) episode = next_data['props']['pageProps']['episode'] - id = episode['id'] + video_id = episode['id'] title = episode.get('title') or self._generic_title('', webpage) url = episode['m3u8'] formats = self._extract_m3u8_formats(url, display_id, ext='ts') @@ -125,11 +125,11 @@ def _real_extract(self, url): episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or [] episode_number = next( - (len(episode_list) - i for (i, e) in enumerate(episode_list) if e.get('id') == id), + (len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id), None) return { - 'id': id, + 'id': video_id, '_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])], 'display_id': display_id, 'title': title, @@ -151,5 +151,5 @@ def _real_extract(self, url): 'series_id': show_id, 'episode': title, 'episode_number': episode_number, - 'episode_id': id + 'episode_id': video_id, } diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index f4a4a834b8ec..5513bb2dfa68 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -11,7 +11,7 @@ class CaltransIE(InfoExtractor): 'title': 'US-50 : Sacramento : Hwy 50 at 24th', 'live_status': 'is_live', 'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 2650cc1ef135..0d0dccb7940c 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -12,12 +12,12 @@ class CAM4IE(InfoExtractor): 'age_limit': 18, 'live_status': 'is_live', 'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss', - } + }, } def _real_extract(self, url): channel_id = self._match_id(url) - m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL') + m3u8_playlist = self._download_json(f'https://www.cam4.com/rest/v1.0/profile/{channel_id}/streamInfo', channel_id).get('cdnURL') formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py index c7079e4224a5..34dc095af86b 100644 --- a/yt_dlp/extractor/camdemy.py +++ b/yt_dlp/extractor/camdemy.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( clean_html, parse_duration, @@ -28,7 +25,7 @@ class CamdemyIE(InfoExtractor): 'duration': 1591, 'upload_date': '20130114', 'view_count': int, - } + }, }, { # With non-empty description # webpage returns "No permission or not login" @@ -42,7 +39,7 @@ class CamdemyIE(InfoExtractor): 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', 'creator': 'evercam', 'duration': 318, - } + }, }, { # External source (YouTube) 'url': 'http://www.camdemy.com/media/14842', @@ -76,12 +73,12 @@ def _real_extract(self, url): title = oembed_obj['title'] thumb_url = oembed_obj['thumbnail_url'] - video_folder = compat_urlparse.urljoin(thumb_url, 'video/') + video_folder = urllib.parse.urljoin(thumb_url, 'video/') file_list_doc = self._download_xml( - compat_urlparse.urljoin(video_folder, 'fileList.xml'), + urllib.parse.urljoin(video_folder, 'fileList.xml'), video_id, 'Downloading filelist XML') file_name = file_list_doc.find('./video/item/fileName').text - video_url = compat_urlparse.urljoin(video_folder, file_name) + video_url = urllib.parse.urljoin(video_folder, file_name) # Some URLs return "No permission or not login" in a webpage despite being # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) @@ -117,35 +114,35 @@ class CamdemyFolderIE(InfoExtractor): 'id': '450', 'title': '信號與系統 2012 & 2011 (Signals and Systems)', }, - 'playlist_mincount': 145 + 'playlist_mincount': 145, }, { # links without trailing slash # and multi-page 'url': 'http://www.camdemy.com/folder/853', 'info_dict': { 'id': '853', - 'title': '科學計算 - 使用 Matlab' + 'title': '科學計算 - 使用 Matlab', }, - 'playlist_mincount': 20 + 'playlist_mincount': 20, }, { # with displayMode parameter. For testing the codes to add parameters 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', 'info_dict': { 'id': '853', - 'title': '科學計算 - 使用 Matlab' + 'title': '科學計算 - 使用 Matlab', }, - 'playlist_mincount': 20 + 'playlist_mincount': 20, }] def _real_extract(self, url): folder_id = self._match_id(url) # Add displayMode=list so that all links are displayed in a single page - parsed_url = list(compat_urlparse.urlparse(url)) - query = dict(compat_urlparse.parse_qsl(parsed_url[4])) + parsed_url = list(urllib.parse.urlparse(url)) + query = dict(urllib.parse.parse_qsl(parsed_url[4])) query.update({'displayMode': 'list'}) - parsed_url[4] = compat_urllib_parse_urlencode(query) - final_url = compat_urlparse.urlunparse(parsed_url) + parsed_url[4] = urllib.parse.urlencode(query) + final_url = urllib.parse.urlunparse(parsed_url) page = self._download_webpage(final_url, folder_id) matches = re.findall(r"href='(/media/\d+/?)'", page) diff --git a/yt_dlp/extractor/camfm.py b/yt_dlp/extractor/camfm.py index 11dafa4a21d2..6036f136fd2a 100644 --- a/yt_dlp/extractor/camfm.py +++ b/yt_dlp/extractor/camfm.py @@ -37,7 +37,7 @@ def _real_extract(self, url): 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( r']+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)), 'title': self._html_search_regex('

([^<]+)

', page, 'title', fatal=False), - 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)) + 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)), } @@ -56,7 +56,7 @@ class CamFMEpisodeIE(InfoExtractor): 'series': 'AITAA: Am I the Agony Aunt?', 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1', 'categories': ['Entertainment'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 135b31529f54..7388cfb6cd9b 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -7,14 +7,14 @@ class CamModelsIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.cammodels.com/cam/AutumnKnight/', 'only_matching': True, - 'age_limit': 18 + 'age_limit': 18, }] def _real_extract(self, url): user_id = self._match_id(url) manifest = self._download_json( - 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id) + f'https://manifest-server.naiadsystems.com/live/s:{user_id}.json', user_id) formats = [] thumbnails = [] @@ -36,7 +36,7 @@ def _real_extract(self, url): format_id_list = [format_id] height = int_or_none(media.get('videoHeight')) if height is not None: - format_id_list.append('%dp' % height) + format_id_list.append(f'{height}p') f = { 'url': media_url, 'format_id': '-'.join(format_id_list), @@ -73,5 +73,5 @@ def _real_extract(self, url): 'thumbnails': thumbnails, 'is_live': True, 'formats': formats, - 'age_limit': 18 + 'age_limit': 18, } diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py index 70ab6c62a1b2..326643175b50 100644 --- a/yt_dlp/extractor/camtasia.py +++ b/yt_dlp/extractor/camtasia.py @@ -17,7 +17,7 @@ class CamtasiaEmbedIE(InfoExtractor): 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', 'ext': 'flv', 'duration': 2235.90, - } + }, }, { 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', 'info_dict': { @@ -25,12 +25,12 @@ class CamtasiaEmbedIE(InfoExtractor): 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', 'ext': 'flv', 'duration': 2235.93, - } + }, }], 'info_dict': { 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', }, - 'skip': 'webpage dead' + 'skip': 'webpage dead', }, ] diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 745e6954c7ad..3a0df95450ef 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -21,7 +21,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211028', 'duration': 1125, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', 'info_dict': { @@ -33,7 +33,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211028', 'duration': 138, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', 'info_dict': { @@ -45,7 +45,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211026', 'duration': 360, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', 'info_dict': { @@ -57,7 +57,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20210726', 'duration': 360, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura', 'info_dict': { diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index 597cb2a6b0ec..c725545fa2f0 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -26,7 +26,7 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.canalc2.tv/video/%s' % video_id, video_id) + f'http://www.canalc2.tv/video/{video_id}', video_id) title = self._html_search_regex( r'(?s)class="[^"]*col_description[^"]*">.*?

(.+?)

', diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index 3ff5c3fbfcbe..728b7a0472bb 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -53,7 +53,7 @@ def _real_extract(self, url): video_data = self._download_json(info_url, video_id, 'Downloading video JSON') if isinstance(video_data, list): - video_data = [video for video in video_data if video.get('ID') == video_id][0] + video_data = next(video for video in video_data if video.get('ID') == video_id) media = video_data['MEDIA'] infos = video_data['INFOS'] @@ -97,8 +97,7 @@ def _real_extract(self, url): return { 'id': video_id, 'display_id': display_id, - 'title': '%s - %s' % (titrage['TITRE'], - titrage['SOUS_TITRE']), + 'title': '{} - {}'.format(titrage['TITRE'], titrage['SOUS_TITRE']), 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), 'thumbnails': thumbnails, 'description': infos.get('DESCRIPTION'), diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py index 79f7752fe07b..493ffdae5e82 100644 --- a/yt_dlp/extractor/caracoltv.py +++ b/yt_dlp/extractor/caracoltv.py @@ -78,13 +78,13 @@ def _perform_login(self, email, password): 'device_data': { 'device_id': str(uuid.uuid4()), 'device_token': '', - 'device_type': 'web' + 'device_type': 'web', }, 'login_data': { 'enabled': True, 'email': email, 'password': password, - } + }, }).encode())['user_token'] def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None): diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py index 4dd7ac46d45b..1749a008a270 100644 --- a/yt_dlp/extractor/cartoonnetwork.py +++ b/yt_dlp/extractor/cartoonnetwork.py @@ -27,7 +27,7 @@ def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False): if content_re: metadata_re = r'|video_metadata\.content_' + content_re return self._search_regex( - r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";', webpage, name, fatal=fatal) media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index a4180262b78d..740e12926483 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -6,9 +6,6 @@ import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( ExtractorError, int_or_none, @@ -99,7 +96,7 @@ class CBCIE(InfoExtractor): # multiple CBC.APP.Caffeine.initInstance(...) 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', 'info_dict': { - 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME: actual title includes " | CBC News" 'id': 'dog-indoor-exercise-winter-1.3928238', 'description': 'md5:c18552e41726ee95bd75210d1ca9194c', }, @@ -108,7 +105,7 @@ class CBCIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + return False if CBCPlayerIE.suitable(url) else super().suitable(url) def _extract_player_init(self, player_init, display_id): player_info = self._parse_json(player_init, display_id, js_to_json) @@ -116,15 +113,15 @@ def _extract_player_init(self, player_init, display_id): if not media_id: clip_id = player_info['clipId'] feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + f'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={{:mpsReleases}}{{{clip_id}}}', clip_id, fatal=False) if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], str) if not media_id: media_id = self._download_json( 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + return self.url_result(f'cbcplayer:{media_id}', 'CBCPlayer', media_id) def _real_extract(self, url): display_id = self._match_id(url) @@ -142,7 +139,7 @@ def _real_extract(self, url): r'guid["\']\s*:\s*["\'](\d+)'): media_ids.extend(re.findall(media_id_re, webpage)) entries.extend([ - self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + self.url_result(f'cbcplayer:{media_id}', 'CBCPlayer', media_id) for media_id in orderedSet(media_ids)]) return self.playlist_result( entries, display_id, strip_or_none(title), @@ -322,11 +319,11 @@ def _real_extract(self, url): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { - 'force_smil_url': True + f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', { + 'force_smil_url': True, }), 'id': video_id, - '_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS + '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS } @@ -338,13 +335,13 @@ class CBCPlayerPlaylistIE(InfoExtractor): 'playlist_mincount': 25, 'info_dict': { 'id': 'news/tv shows/the national/latest broadcast', - } + }, }, { 'url': 'https://www.cbc.ca/player/news/Canada/North', 'playlist_mincount': 25, 'info_dict': { 'id': 'news/canada/north', - } + }, }] def _real_extract(self, url): @@ -355,7 +352,7 @@ def _real_extract(self, url): def entries(): for video_id in traverse_obj(json_content, ( - 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id' + 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id', )): yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE) @@ -453,7 +450,7 @@ def _get_claims_token_expiry(self): # JWT is decoded here and 'exp' field is extracted # It is a Unix timestamp for when the token expires b64_data = self._claims_token.split('.')[1] - data = base64.urlsafe_b64decode(b64_data + "==") + data = base64.urlsafe_b64decode(b64_data + '==') return json.loads(data)['exp'] def claims_token_expired(self): @@ -535,17 +532,17 @@ def _real_extract(self, url): self._remove_duplicate_formats(formats) formats.extend(self._find_secret_formats(formats, video_id)) - for format in formats: - if format.get('vcodec') == 'none': - if format.get('ext') is None: - format['ext'] = 'm4a' - if format.get('acodec') is None: - format['acodec'] = 'mp4a.40.2' + for fmt in formats: + if fmt.get('vcodec') == 'none': + if fmt.get('ext') is None: + fmt['ext'] = 'm4a' + if fmt.get('acodec') is None: + fmt['acodec'] = 'mp4a.40.2' # Put described audio at the beginning of the list, so that it # isn't chosen by default, as most people won't want it. - if 'descriptive' in format['format_id'].lower(): - format['preference'] = -2 + if 'descriptive' in fmt['format_id'].lower(): + fmt['preference'] = -2 return { 'id': video_id, @@ -670,7 +667,7 @@ class CBCGemLiveIE(InfoExtractor): 'title': r're:^Ottawa [0-9\-: ]+', 'description': 'The live TV channel and local programming from Ottawa', 'live_status': 'is_live', - 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*' + 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', @@ -690,7 +687,7 @@ class CBCGemLiveIE(InfoExtractor): }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', - } + }, ] def _real_extract(self, url): @@ -729,5 +726,5 @@ def _real_extract(self, url): 'description': 'description', 'thumbnail': ('images', 'card', 'url'), 'timestamp': ('airDate', {parse_iso8601}), - }) + }), } diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index aca9782c76d4..e8255889724a 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -31,7 +31,7 @@ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): return subtitles def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info): - tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) + tp_path = f'dJ5BDC/media/guid/{mpx_acc}/{content_id}' tp_release_url = f'https://link.theplatform.com/s/{tp_path}' info = self._extract_theplatform_metadata(tp_path, content_id) @@ -41,7 +41,7 @@ def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_inf try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) + f'Downloading {asset_type} SMIL data') except ExtractorError as e: last_e = e if asset_type != 'fallback': @@ -50,7 +50,7 @@ def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_inf try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data, trying again with another format' % asset_type) + f'Downloading {asset_type} SMIL data, trying again with another format') except ExtractorError as e: last_e = e continue diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index ca6b82c981c5..1d781cc477e9 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -25,7 +25,7 @@ class CCCIE(InfoExtractor): 'timestamp': 1388188800, 'duration': 3710, 'tags': list, - } + }, }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', 'only_matching': True, @@ -35,7 +35,7 @@ def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') - event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) + event_data = self._download_json(f'https://media.ccc.de/public/events/{event_id}', event_id) formats = [] for recording in event_data.get('recordings', []): @@ -96,7 +96,7 @@ class CCCPlaylistIE(InfoExtractor): 'title': 'Datenspuren 2023', 'id': 'DS2023', }, - 'playlist_count': 37 + 'playlist_count': 37, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index ab840f3016be..ffe4b49c15d9 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -24,7 +24,7 @@ class CCMAIE(InfoExtractor): 'timestamp': 1478608140, 'upload_date': '20161108', 'age_limit': 0, - } + }, }, { 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', 'md5': 'fa3e38f269329a278271276330261425', @@ -37,7 +37,7 @@ class CCMAIE(InfoExtractor): 'timestamp': 1494622500, 'vcodec': 'none', 'categories': ['Esports'], - } + }, }, { 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', 'md5': 'b43c3d3486f430f3032b5b160d80cbc3', @@ -51,7 +51,7 @@ class CCMAIE(InfoExtractor): 'subtitles': 'mincount:4', 'age_limit': 16, 'series': 'Crims', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 8552ee511cb2..18c080df1bc0 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -167,17 +166,17 @@ def _real_extract(self, url): if isinstance(video, dict): for quality, chapters_key in enumerate(('lowChapters', 'chapters')): video_url = try_get( - video, lambda x: x[chapters_key][0]['url'], compat_str) + video, lambda x: x[chapters_key][0]['url'], str) if video_url: formats.append({ 'url': video_url, 'format_id': 'http', 'quality': quality, # Sample clip - 'preference': -10 + 'preference': -10, }) - hls_url = try_get(data, lambda x: x['hls_url'], compat_str) + hls_url = try_get(data, lambda x: x['hls_url'], str) if hls_url: hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) formats.extend(self._extract_m3u8_formats( diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 0a5a524c16ae..62ee8b17f1e7 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -6,9 +6,10 @@ import json import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_ord, compat_urllib_parse_unquote +from ..compat import compat_ord from ..utils import ( ExtractorError, float_or_none, @@ -51,7 +52,7 @@ class CDAIE(InfoExtractor): 'age_limit': 0, 'upload_date': '20160221', 'timestamp': 1456078244, - } + }, }, { 'url': 'http://www.cda.pl/video/57413289', 'md5': 'a88828770a8310fc00be6c95faf7f4d5', @@ -67,7 +68,7 @@ class CDAIE(InfoExtractor): 'age_limit': 0, 'upload_date': '20160220', 'timestamp': 1455968218, - } + }, }, { # Age-restricted with vfilm redirection 'url': 'https://www.cda.pl/video/8753244c4', @@ -85,7 +86,7 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'timestamp': 1633888264, 'upload_date': '20211010', - } + }, }, { # Age-restricted without vfilm redirection 'url': 'https://www.cda.pl/video/17028157b8', @@ -103,7 +104,7 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'timestamp': 1699705901, 'upload_date': '20231111', - } + }, }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', 'only_matching': True, @@ -263,7 +264,7 @@ def _web_extract(self, video_id): def decrypt_file(a): for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): a = a.replace(p, '') - a = compat_urllib_parse_unquote(a) + a = urllib.parse.unquote(a) b = [] for c in a: f = compat_ord(c) @@ -280,16 +281,16 @@ def decrypt_file(a): def extract_format(page, version): json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, - '%s player_json' % version, fatal=False, group='player_data') + f'{version} player_json', fatal=False, group='player_data') if not json_str: return player_data = self._parse_json( - json_str, '%s player_data' % version, fatal=False) + json_str, f'{version} player_data', fatal=False) if not player_data: return video = player_data.get('video') if not video or 'file' not in video: - self.report_warning('Unable to extract %s version information' % version) + self.report_warning(f'Unable to extract {version} version information') return if video['file'].startswith('uggc'): video['file'] = codecs.decode(video['file'], 'rot_13') @@ -310,11 +311,11 @@ def extract_format(page, version): continue data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} - data = json.dumps(data).encode('utf-8') + data = json.dumps(data).encode() video_url = self._download_json( f'https://www.cda.pl/video/{video_id}', video_id, headers={ 'Content-Type': 'application/json', - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', }, data=data, note=f'Fetching {quality} url', errnote=f'Failed to fetch {quality} url', fatal=False) if try_get(video_url, lambda x: x['result']['status']) == 'ok': @@ -322,7 +323,7 @@ def extract_format(page, version): info_dict['formats'].append({ 'url': video_url, 'format_id': quality, - 'height': int_or_none(quality[:-1]) + 'height': int_or_none(quality[:-1]), }) if not info_dict['duration']: @@ -340,11 +341,11 @@ def extract_format(page, version): webpage = handler( urljoin(self._BASE_URL, href), video_id, - 'Downloading %s version information' % resolution, fatal=False) + f'Downloading {resolution} version information', fatal=False) if not webpage: # Manually report warning because empty page is returned when # invalid version is requested. - self.report_warning('Unable to download %s version information' % resolution) + self.report_warning(f'Unable to download {resolution} version information') continue extract_format(webpage, resolution) diff --git a/yt_dlp/extractor/cellebrite.py b/yt_dlp/extractor/cellebrite.py index 9896a31afe5d..e90365a8beb7 100644 --- a/yt_dlp/extractor/cellebrite.py +++ b/yt_dlp/extractor/cellebrite.py @@ -14,7 +14,7 @@ class CellebriteIE(InfoExtractor): 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED', 'duration': 455, 'tags': [], - } + }, }, { 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/', 'info_dict': { @@ -25,7 +25,7 @@ class CellebriteIE(InfoExtractor): 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf', 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png', 'title': 'Android Extractions Explained', - } + }, }] def _get_formats_and_subtitles(self, json_data, display_id): diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5d6335729629..c323985caf2a 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse from ..networking import Request from ..utils import ( ExtractorError, @@ -97,11 +97,11 @@ class CeskaTelevizeIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, playlist_id) - parsed_url = compat_urllib_parse_urlparse(urlh.url) + parsed_url = urllib.parse.urlparse(urlh.url) site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0] + playlist_title = re.split(rf'\s*[—|]\s*{site_name}', playlist_title, maxsplit=1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') @@ -122,15 +122,15 @@ def _real_extract(self, url): iframe_hash = self._download_webpage( 'https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id, note='Getting IFRAME hash') - query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec} webpage = self._download_webpage( 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' - if '%s

' % NOT_AVAILABLE_STRING in webpage: + if f'{NOT_AVAILABLE_STRING}

' in webpage: self.raise_geo_restricted(NOT_AVAILABLE_STRING) - if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): + if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')): raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None @@ -183,7 +183,7 @@ def _real_extract(self, url): if playlist_url == 'error_region': raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - req = Request(compat_urllib_parse_unquote(playlist_url)) + req = Request(urllib.parse.unquote(playlist_url)) req.headers['Referer'] = url playlist = self._download_json(req, playlist_id, fatal=False) @@ -203,11 +203,11 @@ def _real_extract(self, url): if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % format_id, fatal=False) + m3u8_id=f'hls-{format_id}', fatal=False) else: stream_formats = self._extract_mpd_formats( stream_url, playlist_id, - mpd_id='dash-%s' % format_id, fatal=False) + mpd_id=f'dash-{format_id}', fatal=False) if 'drmOnly=true' in stream_url: for f in stream_formats: f['has_drm'] = True @@ -236,7 +236,7 @@ def _real_extract(self, url): if playlist_len == 1: final_title = playlist_title or title else: - final_title = '%s (%s)' % (playlist_title, title) + final_title = f'{playlist_title} ({title})' entries.append({ 'id': item_id, @@ -261,7 +261,7 @@ def _get_subtitles(self, episode_id, subs): 'cs': [{ 'ext': 'srt', 'data': srt_subs, - }] + }], } @staticmethod @@ -282,7 +282,7 @@ def _fix_subtitle(subtitle): if m: yield m.group(1) start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) - yield '{0} --> {1}'.format(start, stop) + yield f'{start} --> {stop}' else: yield line diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py index 5d9d9bcde710..b9757e06395c 100644 --- a/yt_dlp/extractor/cgtn.py +++ b/yt_dlp/extractor/cgtn.py @@ -20,8 +20,8 @@ class CGTNIE(InfoExtractor): 'categories': ['Video'], }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, }, { 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html', 'info_dict': { @@ -36,9 +36,9 @@ class CGTNIE(InfoExtractor): 'upload_date': '20210606', }, 'params': { - 'skip_download': False - } - } + 'skip_download': False, + }, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 99dfcfdebb96..b49f741efaab 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -37,7 +37,7 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://chaturbate.com/%s/' % video_id, video_id, + f'https://chaturbate.com/{video_id}/', video_id, headers=self.geo_verification_headers()) found_m3u8_urls = [] @@ -85,7 +85,7 @@ def _real_extract(self, url): formats = [] for m3u8_url in m3u8_urls: for known_id in ('fast', 'slow'): - if '_%s' % known_id in m3u8_url: + if f'_{known_id}' in m3u8_url: m3u8_id = known_id break else: @@ -99,7 +99,7 @@ def _real_extract(self, url): return { 'id': video_id, 'title': video_id, - 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, + 'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg', 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py index 706ec8553b98..66831ef62d23 100644 --- a/yt_dlp/extractor/cinemax.py +++ b/yt_dlp/extractor/cinemax.py @@ -20,6 +20,6 @@ class CinemaxIE(HBOBaseIE): def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() - info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) + info = self._extract_info(f'https://www.cinemax.com/{path}.xml', video_id) info['id'] = video_id return info diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py index 745b71f24399..834890d56f68 100644 --- a/yt_dlp/extractor/cinetecamilano.py +++ b/yt_dlp/extractor/cinetecamilano.py @@ -27,8 +27,8 @@ class CinetecaMilanoIE(InfoExtractor): 'modified_date': '20200520', 'duration': 3139, 'release_timestamp': 1643446208, - 'modified_timestamp': int - } + 'modified_timestamp': int, + }, }] def _real_extract(self, url): @@ -38,7 +38,7 @@ def _real_extract(self, url): f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?', video_id, headers={ 'Referer': url, - 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' + 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '', }) except ExtractorError as e: if ((isinstance(e.cause, HTTPError) and e.cause.status == 500) @@ -58,5 +58,5 @@ def _real_extract(self, url): 'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '), 'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))), 'formats': self._extract_m3u8_formats( - urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4') + urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4'), } diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py index 4405297c62e0..c8c6c48c2700 100644 --- a/yt_dlp/extractor/cineverse.py +++ b/yt_dlp/extractor/cineverse.py @@ -13,7 +13,7 @@ class CineverseBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://www\.(?P%s)' % '|'.join(map(re.escape, ( + _VALID_URL_BASE = r'https?://www\.(?P{})'.format('|'.join(map(re.escape, ( 'cineverse.com', 'asiancrush.com', 'dovechannel.com', @@ -21,7 +21,7 @@ class CineverseBaseIE(InfoExtractor): 'midnightpulp.com', 'fandor.com', 'retrocrush.tv', - ))) + )))) class CineverseIE(CineverseBaseIE): @@ -38,7 +38,7 @@ class CineverseIE(CineverseBaseIE): 'duration': 5811.597, 'description': 'md5:892fd62a05611d394141e8394ace0bc6', 'age_limit': 13, - } + }, }, { 'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie', 'skip': 'geo-blocked', @@ -55,7 +55,7 @@ class CineverseIE(CineverseBaseIE): 'duration': 1485.067, 'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.', 'series': 'Space Adventure COBRA (Original Japanese)', - } + }, }] def _real_extract(self, url): @@ -104,7 +104,7 @@ class CineverseDetailsIE(CineverseBaseIE): 'info_dict': { 'title': 'Space Adventure COBRA (Original Japanese)', 'id': '1000000023012', - } + }, }, { 'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel', 'info_dict': { diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index 0668578170f8..1584ca6657b3 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -105,7 +105,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): @classmethod def suitable(cls, url): - return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + return False if CiscoLiveSessionIE.suitable(url) else super().suitable(url) @staticmethod def _check_bc_id_exists(rf_item): @@ -117,7 +117,7 @@ def _entries(self, query, url): for page_num in itertools.count(1): results = self._call_api( 'search', None, query, url, - 'Downloading search JSON page %d' % page_num) + f'Downloading search JSON page {page_num}') sl = try_get(results, lambda x: x['sectionList'][0], dict) if sl: results = sl diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 85585dffbb40..d39347c82c10 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -46,7 +46,7 @@ def _real_extract(self, url): headers['accessPwd'] = password stream, urlh = self._download_json_handle( - 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), + f'https://{subdomain}.webex.com/webappng/api/v1/recordings/{video_id}/stream', video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) if urlh.status == 403: @@ -101,6 +101,6 @@ def _real_extract(self, url): 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), 'timestamp': unified_timestamp(stream.get('createTime')), 'duration': int_or_none(stream.get('duration'), 1000), - 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), + 'webpage_url': f'https://{subdomain}.webex.com/recordingservice/sites/{siteurl}/recording/playback/{video_id}', 'formats': formats, } diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py index c37a3b84828c..b80236a7ee14 100644 --- a/yt_dlp/extractor/cjsw.py +++ b/yt_dlp/extractor/cjsw.py @@ -27,7 +27,7 @@ class CJSWIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) program, episode_id = mobj.group('program', 'id') - audio_id = '%s/%s' % (program, episode_id) + audio_id = f'{program}/{episode_id}' webpage = self._download_webpage(url, episode_id) diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index 67b56e00d9ab..393f21730809 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -23,7 +23,7 @@ class ClippitIE(InfoExtractor): 'upload_date': '20160826', 'description': 'BattleBots | ABC', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, } def _real_extract(self, url): @@ -36,7 +36,7 @@ def _real_extract(self, url): quality = qualities(FORMATS) formats = [] for format_id in FORMATS: - url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"', webpage, 'url', fatal=False) if not url: continue diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py index c2add02da46f..42f78cac6586 100644 --- a/yt_dlp/extractor/cliprs.py +++ b/yt_dlp/extractor/cliprs.py @@ -15,7 +15,7 @@ class ClipRsIE(OnetBaseIE): 'duration': 229, 'timestamp': 1459850243, 'upload_date': '20160405', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 1f9a5f6114ea..77469eda997f 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -15,7 +15,7 @@ class CloserToTruthIE(InfoExtractor): 'title': 'Solutions to the Mind-Body Problem?', 'upload_date': '20140221', 'timestamp': 1392956007, - 'uploader_id': 'CTTXML' + 'uploader_id': 'CTTXML', }, 'params': { 'skip_download': True, @@ -29,7 +29,7 @@ class CloserToTruthIE(InfoExtractor): 'title': 'How do Brains Work?', 'upload_date': '20140221', 'timestamp': 1392956024, - 'uploader_id': 'CTTXML' + 'uploader_id': 'CTTXML', }, 'params': { 'skip_download': True, @@ -69,7 +69,7 @@ def _real_extract(self, url): entry_ids.add(entry_id) entries.append({ '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'url': f'kaltura:{partner_id}:{entry_id}', 'ie_key': 'Kaltura', 'title': mobj.group('title'), }) @@ -83,7 +83,7 @@ def _real_extract(self, url): return { '_type': 'url_transparent', 'display_id': display_id, - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'url': f'kaltura:{partner_id}:{entry_id}', 'ie_key': 'Kaltura', - 'title': title + 'title': title, } diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index a812c24af8ad..f902daacf66a 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -53,7 +53,7 @@ class CloudflareStreamIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' - base_url = 'https://%s/%s/' % (domain, video_id) + base_url = f'https://{domain}/{video_id}/' if '.' in video_id: video_id = self._parse_json(base64.urlsafe_b64decode( video_id.split('.')[1] + '==='), video_id)['sub'] diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index e6e470e073e3..58bde466630a 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -22,7 +22,7 @@ class CloudyCDNIE(InfoExtractor): 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', - } + }, }, { 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1', 'md5': '798828a479151e2444d8dcfbec76e482', @@ -34,7 +34,7 @@ class CloudyCDNIE(InfoExtractor): 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg', 'duration': 1205, 'upload_date': '20221130', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -47,7 +47,7 @@ class CloudyCDNIE(InfoExtractor): 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg', 'timestamp': 1677181513, 'title': 'LIB-2', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 716f25969468..c908e61a1e36 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -18,7 +18,7 @@ class ClubicIE(InfoExtractor): 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', - } + }, }, { 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', 'only_matching': True, @@ -27,7 +27,7 @@ class ClubicIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id + player_url = f'http://player.m6web.fr/v1/player/clubic/{video_id}.html' player_page = self._download_webpage(player_url, video_id) config = self._parse_json(self._search_regex( diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index 273d0025f09a..2702427c861b 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -58,13 +58,13 @@ def _real_extract(self, url): query['token'] = token metadata = self._download_json( - 'https://api.clyp.it/%s' % audio_id, audio_id, query=query) + f'https://api.clyp.it/{audio_id}', audio_id, query=query) formats = [] for secure in ('', 'Secure'): for ext in ('Ogg', 'Mp3'): - format_id = '%s%s' % (secure, ext) - format_url = metadata.get('%sUrl' % format_id) + format_id = f'{secure}{ext}' + format_url = metadata.get(f'{format_id}Url') if format_url: formats.append({ 'url': format_url, diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index 6359102aa500..8e53b7fbf87f 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -1,6 +1,6 @@ from .mtv import MTVIE -# TODO Remove - Reason: Outdated Site +# TODO: Remove - Reason: Outdated Site class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE @@ -52,4 +52,4 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mgid = self._extract_mgid(webpage, url) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + return self.url_result(f'http://media.mtvnservices.com/embed/{mgid}') diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py index 61b62fae9f7e..fe7615a89148 100644 --- a/yt_dlp/extractor/cnn.py +++ b/yt_dlp/extractor/cnn.py @@ -26,7 +26,7 @@ class CNNIE(TurnerBaseIE): 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", - 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + 'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."', 'upload_date': '20130821', }, 'expected_warnings': ['Failed to download m3u8 information'], @@ -161,7 +161,7 @@ class CNNIndonesiaIE(InfoExtractor): 'release_timestamp': 1662859088, 'release_date': '20220911', 'uploader': 'Asfahan Yahsyi', - } + }, }, { 'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris', 'info_dict': { @@ -178,7 +178,7 @@ class CNNIndonesiaIE(InfoExtractor): 'release_date': '20220911', 'uploader': 'REUTERS', 'release_timestamp': 1662869995, - } + }, }] def _real_extract(self, url): @@ -194,5 +194,5 @@ def _real_extract(self, url): '_type': 'url_transparent', 'url': embed_url, 'upload_date': upload_date, - 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')) + 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')), }) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1d2c443c0b75..2799747ece60 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -60,7 +60,6 @@ determine_ext, dict_get, encode_data_uri, - error_to_compat_str, extract_attributes, filter_dict, fix_xml_ampersands, @@ -767,8 +766,8 @@ def __maybe_fake_ip_and_retry(self, countries): self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: self.report_warning( - 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' - % (self._x_forwarded_for_ip, country_code.upper())) + 'Video is geo restricted. Retrying extraction with fake IP ' + f'{self._x_forwarded_for_ip} ({country_code.upper()}) as X-Forwarded-For.') return True return False @@ -841,7 +840,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa if not self._downloader._first_webpage_request: sleep_interval = self.get_param('sleep_interval_requests') or 0 if sleep_interval > 0: - self.to_screen('Sleeping %s seconds ...' % sleep_interval) + self.to_screen(f'Sleeping {sleep_interval} seconds ...') time.sleep(sleep_interval) else: self._downloader._first_webpage_request = False @@ -898,7 +897,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa if errnote is None: errnote = 'Unable to download webpage' - errmsg = f'{errnote}: {error_to_compat_str(err)}' + errmsg = f'{errnote}: {err}' if fatal: raise ExtractorError(errmsg, cause=err) else: @@ -987,7 +986,7 @@ def __check_blocked(self, content): r'