Skip to content

Commit

Permalink
Merge pull request #31 from hbmartin/playlist-addons
Browse files Browse the repository at this point in the history
Add last_update property to playlist
  • Loading branch information
hbmartin authored Feb 2, 2020
2 parents 090fc4a + c95bd40 commit 78f9f26
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 13 deletions.
39 changes: 27 additions & 12 deletions pytube/contrib/playlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import logging
import re
from collections import OrderedDict
from datetime import date, datetime
from typing import List, Optional, Iterable, Dict
from urllib.parse import parse_qs

from pytube import request, YouTube, extract
from pytube import request, YouTube
from pytube.helpers import cache, deprecated
from pytube.mixins import install_proxy

Expand All @@ -34,6 +35,17 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
)
self.html = request.get(self.playlist_url)

# Needs testing with non-English
self.last_update: Optional[date] = None
results = re.search(
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html
)
if results:
month, day, year = results.groups()
self.last_update = datetime.strptime(
f"{month} {day:0>2} {year}", "%b %d %Y"
).date()

@staticmethod
def _find_load_more_url(req: str) -> Optional[str]:
"""Given an html page or a fragment thereof, looks for
Expand All @@ -48,11 +60,10 @@ def _find_load_more_url(req: str) -> Optional[str]:

return None

def parse_links(self) -> List[str]:
def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]:
"""Parse the video links from the page source, extracts and
returns the /watch?v= part from video link href
"""

req = self.html

# split the page source by line and process each line
Expand All @@ -63,6 +74,12 @@ def parse_links(self) -> List[str]:
# Simulating a browser request for the load more link
load_more_url = self._find_load_more_url(req)
while load_more_url: # there is an url found
if until_watch_id:
try:
trim_index = link_list.index(f"/watch?v={until_watch_id}")
return link_list[:trim_index]
except ValueError:
pass
logger.debug("load more url: %s", load_more_url)
req = request.get(load_more_url)
load_more = json.loads(req)
Expand All @@ -86,12 +103,8 @@ def trimmed(self, video_id: str) -> List[str]:
:returns:
List of video URLs from the playlist trimmed at the given ID
"""
trimmed_urls = []
for url in self.video_urls:
if extract.video_id(url) == video_id:
break
trimmed_urls.append(url)
return trimmed_urls
trimmed_watch = self.parse_links(until_watch_id=video_id)
return [self._video_url(watch_path) for watch_path in trimmed_watch]

@property # type: ignore
@cache
Expand All @@ -101,9 +114,7 @@ def video_urls(self) -> List[str]:
:returns:
List of video URLs
"""
return [
"https://www.youtube.com" + watch_path for watch_path in self.parse_links()
]
return [self._video_url(watch_path) for watch_path in self.parse_links()]

@property
def videos(self) -> Iterable[YouTube]:
Expand Down Expand Up @@ -213,3 +224,7 @@ def title(self) -> Optional[str]:
.replace("- YouTube", "")
.strip()
)

@staticmethod
def _video_url(watch_path: str):
return f"https://www.youtube.com{watch_path}"
26 changes: 25 additions & 1 deletion tests/contrib/test_playlist.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import datetime
from unittest import mock
from unittest.mock import MagicMock

Expand Down Expand Up @@ -39,6 +40,14 @@ def test_init_with_watch_url(request_get):
)


@mock.patch("pytube.contrib.playlist.request.get")
def test_last_update(request_get, playlist_html):
expected = datetime.date(2019, 3, 7)
request_get.return_value = playlist_html
playlist = Playlist("url")
assert playlist.last_update == expected


@mock.patch("pytube.contrib.playlist.request.get")
def test_init_with_watch_id(request_get):
request_get.return_value = ""
Expand Down Expand Up @@ -107,6 +116,20 @@ def test_videos(youtube, request_get, playlist_html):
assert len(list(playlist.videos)) == 12


@mock.patch("pytube.contrib.playlist.request.get")
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
def test_load_more(youtube, request_get, playlist_html):
url = "https://www.fakeurl.com/playlist?list=whatever"
request_get.side_effect = [
playlist_html,
'{"content_html":"", "load_more_widget_html":""}',
]
playlist = Playlist(url)
playlist._find_load_more_url = MagicMock(side_effect=["dummy", None])
request_get.assert_called()
assert len(list(playlist.videos)) == 12


@mock.patch("pytube.contrib.playlist.request.get")
@mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
def test_proxy(install_proxy, request_get):
Expand All @@ -121,7 +144,8 @@ def test_trimmed(request_get, playlist_html):
url = "https://www.fakeurl.com/playlist?list=whatever"
request_get.return_value = playlist_html
playlist = Playlist(url)
playlist._find_load_more_url = MagicMock(return_value=None)
playlist._find_load_more_url = MagicMock(return_value="dummy")
assert request_get.call_count == 1
assert playlist.trimmed("1BYu65vLKdA") == [
"https://www.youtube.com/watch?v=ujTCoH21GlA",
"https://www.youtube.com/watch?v=45ryDIPHdGg",
Expand Down

0 comments on commit 78f9f26

Please sign in to comment.