-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathted_talk_downloader.py
55 lines (34 loc) · 1.32 KB
/
ted_talk_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# TED TALK video downloader script using python
import requests # for getting content
from bs4 import BeautifulSoup # webscraping for urls to download
import re # pattern matching
import sys # passing arguments
# if you want to use command line instead of direct url
# if len(sys.argv) > 1:
# url = sys.argv[1]
# else:
# sys.exit('Error! Enter the url to TED Talk')
# sample url
# https://www.ted.com/talks/eleni_myrivili_a_3_part_plan_to_take_on_extreme_heat_waves
# get the content of the url
url = "https://www.ted.com/talks/ayana_elizabeth_johnson_how_to_find_joy_in_climate_action"
r = requests.get(url)
print('Download about to start!...')
# scraping details about the video.
soup = BeautifulSoup(r.content, features='lxml')
for val in soup.findAll("script"):
if (re.search("https", str(val))) is not None:
result = str(val)
result_mp4 = re.search("(?P<url>https?://[^\s]+)(mp4)", result).group("url")
mp4_url = result_mp4.split('"')[0]
print('Downloading video from ' + mp4_url)
file_name = mp4_url.split("-")[:len(mp4_url.split("/"))-1][1:-2]
file_name = '_'.join(file_name)
file_name += '.mp4'
print(f'file name: {file_name}')
print(mp4_url)
print('Downloading the file')
video = requests.get(mp4_url+'mp4')
with open(file_name, 'wb') as f:
f.write(video.content)
print('Download Complete..!')