[extractor/common] add generic support for akamai http format extraction

ytdl-org · Nov 22, 2020 · 193422e · 193422e · nixxo · Dec 2, 2020
1 parent c4cabf0
commit 193422e
Showing 1 changed file with 27 additions and 0 deletions.
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -2596,6 +2596,7 @@ def _media_formats(src, cur_media_type, type_info={}):
 
     def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
         formats = []
+
         hdcore_sign = 'hdcore=3.7.0'
         f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
         hds_host = hosts.get('hds')
@@ -2608,13 +2609,39 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
         for entry in f4m_formats:
             entry.update({'extra_param_to_segment_url': hdcore_sign})
         formats.extend(f4m_formats)
+
         m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
         hls_host = hosts.get('hls')
         if hls_host:
             m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
         formats.extend(self._extract_m3u8_formats(
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False))
+
+        http_host = hosts.get('http')
+        if http_host and 'hdnea=' not in manifest_url:
+            REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
+            qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
+            qualities_length = len(qualities)
+            if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
+                i = 0
+                http_formats = []
+                for f in formats:
+                    if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
+                        for protocol in ('http', 'https'):
+                            http_f = f.copy()
+                            del http_f['manifest_url']
+                            http_url = re.sub(
+                                REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
+                            http_f.update({
+                                'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
+                                'url': http_url,
+                                'protocol': protocol,
+                            })
+                            http_formats.append(http_f)
+                        i += 1
+                formats.extend(http_formats)
+
         return formats
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):