Skip to content
This repository has been archived by the owner on Sep 24, 2021. It is now read-only.

Playlist interactive #36

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion acfun.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def acfun_download_by_id(id, title, merge=True):
with open(title + '.json', 'w') as x:
x.write(srt)

def acfun_download(url, merge=True):
def acfun_download(url, config):
merge = config["merge"]
assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url)
html = get_html(url).decode('utf-8')

Expand Down
5 changes: 3 additions & 2 deletions bilibili.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def parse_srt_p(p):
# pool 0: normal
# pool 1: srt
# pool 2: special?

font_size = int(font_size)

font_color = '#%06x' % int(font_color)
Expand Down Expand Up @@ -61,7 +61,8 @@ def bilibili_download_by_cid(id, title, merge=True):
else:
raise NotImplementedError(urls[0])

def bilibili_download(url, merge=True):
def bilibili_download(url, config):
merge = config["merge"]
assert re.match(r'http://(www.bilibili.tv|bilibili.kankanews.com|bilibili.smgbb.cn)/video/av(\d+)', url)
html = get_html(url)

Expand Down
3 changes: 2 additions & 1 deletion cntv.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def cntv_download_by_id(id, title=None, output_dir='.', merge=True):
assert ext in ('flv', 'mp4')
download_urls(urls, title, str(ext), total_size=None, merge=merge)

def cntv_download(url, merge=True):
def cntv_download(url, config):
merge = config["merge"]
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url):
id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url))
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
Expand Down
83 changes: 78 additions & 5 deletions common.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,72 @@ def url_size(url):
def urls_size(urls):
return sum(map(url_size, urls))

def select_playlist_info(alist):
"""Interactive select items from a list of {} with key "title" in it."""
result = set({})
alen = len(alist)
while True:
print("\nAvailable vidoes:")
for i, vid_info in enumerate(alist):
tag = " "
if i in result:
tag = "*"
print "%s%d) %s" % (tag, i+1, vid_info["title"])
print("Total: %d" % alen)
resp = raw_input("video numbers (h = HELP): ")
resp = resp.strip().strip(",").lower()
if len(resp) <= 0:
continue
elif resp in {"?", "h", "help"}:
print('=============== HELP ====================')
print('Select number and press <Enter>.')
print('Seperate numbers by ","; use range like: 3-5.')
print('Example: 1,3-5,8')
print('a = all; n = none; q = finish; h = help')
print('============= HELP END ==================')
raw_input("Press <Enter> to continue...")
continue
elif resp in {"a", "all"}:
result = set(range(alen))
continue
elif resp in {"n", "none"}:
result = set()
continue
elif resp in {"q", "quit", "finish", "exit", "bye"}:
break

resp = [x.strip() for x in resp.split(",")]
try:
ranges = []
for item in resp:
# also do range as 3-5
start, _, end = item.partition("-")
start = int(start.strip())
if end:
end = int(end.strip())
else:
end = start

ranges.append((start, end))

for start, end in ranges:
# user input count start from 1
for i in range(start - 1, end):
if i >= alen:
continue
if i in result:
result.remove(i)
else:
result.add(i)
except ValueError:
pass

ret = []
for i in range(len(alist)):
if i in result:
ret.append(alist[i])
return ret

class SimpleProgressBar:
def __init__(self, total_size, total_pieces=1):
self.displayed = False
Expand Down Expand Up @@ -247,9 +313,9 @@ def f(*args, **kwargs):

def script_main(script_name, download, download_playlist=None):
if download_playlist:
help = 'python %s.py [--playlist] [-c|--create-dir] [--no-merge] url ...' % script_name
short_opts = 'hc'
opts = ['help', 'playlist', 'create-dir', 'no-merge']
help = 'python %s.py [--playlist] [-c|--create-dir] [--no-merge] [-i|--interactive] url ...' % script_name
short_opts = 'hci'
opts = ['help', 'playlist', 'create-dir', 'no-merge', 'interactive']
else:
help = 'python [--no-merge] %s.py url ...' % script_name
short_opts = 'h'
Expand All @@ -261,6 +327,9 @@ def script_main(script_name, download, download_playlist=None):
print help
sys.exit(1)
playlist = False

config = {}
config["interactive"] = False
create_dir = False
merge = True
for o, a in opts:
Expand All @@ -273,16 +342,20 @@ def script_main(script_name, download, download_playlist=None):
create_dir = True
elif o in ('--no-merge'):
merge = False
elif o in ('-i', '--interactive'):
config["interactive"] = True
else:
print help
sys.exit(1)
if not args:
print help
sys.exit(1)

config["create_dir"] = create_dir
config["merge"] = merge
for url in args:
if playlist:
download_playlist(url, create_dir=create_dir, merge=merge)
download_playlist(url, config=config)
else:
download(url, merge=merge)
download(url, config=config)

3 changes: 2 additions & 1 deletion iask.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def iask_download_by_id(id, title=None, merge=True):
assert title
download_urls(urls, title, 'flv', total_size=None, merge=merge)

def iask_download(url, merge=True):
def iask_download(url, config):
merge = config["merge"]
id = r1(r'vid:(\d+),', get_html(url))
iask_download_by_id(id, merge=merge)

Expand Down
3 changes: 2 additions & 1 deletion ifeng.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def ifeng_download_by_id(id, title=None, merge=True):
assert url.endswith('.mp4')
download_urls([url], title, 'mp4', total_size=None, merge=merge)

def ifeng_download(url, merge=True):
def ifeng_download(url, config):
merge = config["merge"]
id = r1(r'/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.shtml$', url)
if id:
return ifeng_download_by_id(id)
Expand Down
3 changes: 2 additions & 1 deletion iqiyi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ def real_url(url):
import json
return json.loads(get_html(url[:-3]+'hml?v='+str(int(time.time()) + 1921658928)))['l'] # XXX: what is 1921658928?

def iqiyi_download(url, merge=True):
def iqiyi_download(url, config):
merge = config["merge"]
html = get_html(url)
#title = r1(r'title\s*:\s*"([^"]+)"', html)
#title = unescape_html(title).decode('utf-8')
Expand Down
3 changes: 2 additions & 1 deletion ku6.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def ku6_download_by_id(id, title=None, output_dir='.', merge=True):
ext = {'f4v':'flv'}.get(ext, ext)
download_urls(urls, title, ext, total_size=size, merge=merge)

def ku6_download(url, merge=True):
def ku6_download(url, config):
merge = config["merge"]
id = r1(r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html', url)
ku6_download_by_id(id, merge=merge)

Expand Down
3 changes: 2 additions & 1 deletion pptv.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ def pptv_download_by_id(id, merge=True):
assert rid.endswith('.mp4')
download_urls(urls, title, 'mp4', total_size=total_size, merge=merge)

def pptv_download(url, merge=True):
def pptv_download(url, config):
assert re.match(r'http://v.pptv.com/show/(\w+)\.html$', url)
merge = config["merge"]
html = get_html(url)
id = r1(r'webcfg\s*=\s*{"id":\s*(\d+)', html)
assert id
Expand Down
3 changes: 2 additions & 1 deletion qq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import re
from common import *

def qq_download_by_id(id, title, merge=True):
def qq_download_by_id(id, title, config):
url = 'http://vsrc.store.qq.com/%s.flv' % id
assert title
merge = config["merge"]
download_urls([url], title, 'flv', total_size=None, merge=merge)

3 changes: 2 additions & 1 deletion sohu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ def real_url(host, prot, file, new):
start, _, host, key, _, _ = get_html(url).split('|')
return '%s%s?key=%s' % (start[:-1], new, key)

def sohu_download(url, merge=True):
def sohu_download(url, config):
merge = config["merge"]
vid = r1('vid="(\d+)"', get_html(url))
assert vid
import json
Expand Down
7 changes: 5 additions & 2 deletions tudou.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def tudou_download_by_id(id, title, merge=True):
iid = r1(r'iid\s*=\s*(\S+)', html)
tudou_download_by_iid(iid, title, merge=merge)

def tudou_download(url, merge=True):
def tudou_download(url, config):
merge = config["merge"]
html = get_decoded_html(url)
iid = r1(r'iid\s*[:=]\s*(\d+)', html)
assert iid
Expand Down Expand Up @@ -58,7 +59,9 @@ def parse_playlist(url):
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]

def tudou_download_playlist(url, create_dir=False, merge=True):
def tudou_download_playlist(url, config):
merge = config["merge"]
create_dir = config["create_dir"]
if create_dir:
raise NotImplementedError('please report a bug so I can implement this')
videos = parse_playlist(url)
Expand Down
8 changes: 4 additions & 4 deletions video_lixian.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ def url_to_module(url):
else:
raise NotImplementedError(url)

def any_download(url, merge=True):
def any_download(url, config):
m = url_to_module(url)
m.download(url, merge=merge)
m.download(url, config=config)

def any_download_playlist(url, create_dir=False, merge=True):
def any_download_playlist(url, config):
m = url_to_module(url)
m.download_playlist(url, create_dir=create_dir, merge=merge)
m.download_playlist(url, config=config)

def main():
script_main('video_lixian', any_download, any_download_playlist)
Expand Down
3 changes: 2 additions & 1 deletion w56.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def w56_download_by_id(id, title=None, output_dir='.', merge=True):
assert ext in ('flv', 'mp4')
download_urls([url], title, str(ext), total_size=size, merge=merge)

def w56_download(url, merge=True):
def w56_download(url, config):
merge = config["merge"]
id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url)
w56_download_by_id(id, merge=merge)

Expand Down
3 changes: 2 additions & 1 deletion yinyuetai.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def yinyuetai_download_by_id(id, title=None, merge=True):
ext, size = url_info(url)
download_urls([url], title, ext, total_size=size, merge=merge)

def yinyuetai_download(url, merge=True):
def yinyuetai_download(url, config):
merge = config["merge"]
id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url)
assert id
html = get_html(url, 'utf-8')
Expand Down
72 changes: 64 additions & 8 deletions youku.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def parse_page(url):
page = get_html(url)
id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
title = parse_video_title(url, page)
if type(title) == unicode:
title = title.encode(default_encoding)
title = title.replace('?', '-')
return id2, title

def get_info(videoId2):
Expand Down Expand Up @@ -127,11 +130,9 @@ def youku_download_by_id(id2, title, output_dir='.', stream_type=None, merge=Tru
total_size = sum(sizes)
download_urls(urls, title, file_type_of_url(urls[0]), total_size, output_dir, merge=merge)

def youku_download(url, output_dir='', stream_type=None, merge=True):
def youku_download(url, config, output_dir='', stream_type=None):
merge = config["merge"]
id2, title = parse_page(url)
if type(title) == unicode:
title = title.encode(default_encoding)
title = title.replace('?', '-')
youku_download_by_id(id2, title, output_dir, merge=merge)

def parse_playlist_videos(html):
Expand Down Expand Up @@ -167,7 +168,10 @@ def parse_vplaylist(url):
n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]

def youku_download_playlist(url, create_dir=False, merge=True):
def youku_download_playlist(url, config):
create_dir = config["create_dir"]
print("Collecting video IDs for the playlist...")

if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
url = find_video_id_from_show_page(url)
if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
Expand All @@ -188,9 +192,61 @@ def youku_download_playlist(url, create_dir=False, merge=True):
if not os.path.exists(title):
os.makedirs(title)
output_dir = title
for i, id in enumerate(ids):
print 'Downloading %s of %s videos...' % (i + 1, len(ids))
youku_download(id, output_dir=output_dir, merge=merge)

ids_len = len(ids)
if not config["interactive"]:
for i, id in enumerate(ids):
print 'Downloading %s of %s videos...' % (i + 1, len(ids))
youku_download(id, config=config, output_dir=output_dir)
else:
ids_info = []
ids_map = {}
print("Collecting video titles...")

# download video title in threads
import threading
ids_map_lock = threading.Lock()

def get_title(idx, avid):
"""Download thread title and save it"""
id2, title = parse_page(avid)
vid_info = {"id":id2, "title": title}
ids_map_lock.acquire()
ids_map[avid] = vid_info
print("[%d/%d] %s: %s" % (idx+1, ids_len, id2, title))
ids_map_lock.release()

args_list = list(enumerate(ids))
args_list.reverse()
thread_list = set()
conns = 5 # number of cocurrent threads
while len(thread_list) > 0 or len(args_list) > 0:
# start some threads
while len(thread_list) < conns and len(args_list) > 0:
args = args_list.pop()
t = threading.Thread(target=get_title, args=args)
thread_list.add(t)
t.start()

# remove finished threads
t_done = []
for t1 in thread_list:
t1.join(.1)
if not t1.isAlive():
t_done.append(t1)
for t1 in t_done:
thread_list.remove(t1)

# restore ids order
for i in ids:
ids_info.append(ids_map[i])

selected_ids = select_playlist_info(ids_info)
selected_len = len(selected_ids)
for i, vid_info in enumerate(selected_ids):
print 'Downloading %s of %s videos...' % (i + 1, selected_len)
youku_download_by_id(vid_info["id"], vid_info["title"],
output_dir, merge=config["merge"])

download = youku_download
download_playlist = youku_download_playlist
Expand Down