Skip to content

Commit

Permalink
[ie/tiktok] Fix and deprioritize JSON subtitles (#10516)
Browse files Browse the repository at this point in the history
Fixes regression caused by 5ce5824

Closes #10514
Authored by: bashonly
  • Loading branch information
bashonly authored Jul 23, 2024
1 parent 713b4cd commit 2f97779
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions yt_dlp/extractor/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
mimetype2ext,
parse_qs,
qualities,
remove_start,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
Expand Down Expand Up @@ -254,7 +253,16 @@ def _extract_web_data_and_status(self, url, video_id, fatal=True):

def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info

EXT_MAP = { # From lowest to highest preference
'creator_caption': 'json',
'srt': 'srt',
'webvtt': 'vtt',
}
preference = qualities(tuple(EXT_MAP.values()))

subtitles = {}

# aweme/detail endpoint subs
captions_info = traverse_obj(
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
Expand All @@ -278,8 +286,8 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
if not caption.get('url'):
continue
subtitles.setdefault(caption.get('lang') or 'en', []).append({
'ext': remove_start(caption.get('caption_format'), 'web'),
'url': caption['url'],
'ext': EXT_MAP.get(caption.get('Format')),
})
# webpage subs
if not subtitles:
Expand All @@ -288,9 +296,14 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'],
'ext': EXT_MAP.get(caption.get('Format')),
})

# Deprioritize creator_caption json since it can't be embedded or used by media players
for lang, subs_list in subtitles.items():
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))

return subtitles

def _parse_url_key(self, url_key):
Expand Down

0 comments on commit 2f97779

Please sign in to comment.