From 5a6acf17b9a4cb752c3442568188f2e01fb060bd Mon Sep 17 00:00:00 2001 From: Harold Martin Date: Sun, 2 Feb 2020 07:02:52 -0800 Subject: [PATCH 1/3] add last_update property to playlist --- pytube/contrib/playlist.py | 8 ++++++++ tests/contrib/test_playlist.py | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/pytube/contrib/playlist.py b/pytube/contrib/playlist.py index 231b9f1fc..e13346157 100644 --- a/pytube/contrib/playlist.py +++ b/pytube/contrib/playlist.py @@ -5,6 +5,7 @@ import logging import re from collections import OrderedDict +from datetime import date, datetime from typing import List, Optional, Iterable, Dict from urllib.parse import parse_qs @@ -34,6 +35,13 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): ) self.html = request.get(self.playlist_url) + # Needs testing with non-English + self.last_update: Optional[date] = None + results = re.search(r"
  • Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html) + if results: + month, day, year = results.groups() + self.last_update = datetime.strptime(f"{month} {day:0>2} {year}", "%b %d %Y").date() + @staticmethod def _find_load_more_url(req: str) -> Optional[str]: """Given an html page or a fragment thereof, looks for diff --git a/tests/contrib/test_playlist.py b/tests/contrib/test_playlist.py index 9213ce155..94857910f 100644 --- a/tests/contrib/test_playlist.py +++ b/tests/contrib/test_playlist.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import datetime from unittest import mock from unittest.mock import MagicMock @@ -39,6 +40,14 @@ def test_init_with_watch_url(request_get): ) +@mock.patch("pytube.contrib.playlist.request.get") +def test_last_update(request_get, playlist_html): + expected = datetime.date(2019, 3, 7) + request_get.return_value = playlist_html + playlist = Playlist("url") + assert playlist.last_update == expected + + @mock.patch("pytube.contrib.playlist.request.get") def test_init_with_watch_id(request_get): request_get.return_value = "" From 30b06f6f098defaf235997b4402fff2b6ded23d2 Mon Sep 17 00:00:00 2001 From: Harold Martin Date: Sun, 2 Feb 2020 07:43:58 -0800 Subject: [PATCH 2/3] prevent unnecessary load more calls when trimming --- pytube/contrib/playlist.py | 35 ++++++++++++++++++++-------------- tests/contrib/test_playlist.py | 3 ++- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/pytube/contrib/playlist.py b/pytube/contrib/playlist.py index e13346157..a2fc1ca65 100644 --- a/pytube/contrib/playlist.py +++ b/pytube/contrib/playlist.py @@ -9,7 +9,7 @@ from typing import List, Optional, Iterable, Dict from urllib.parse import parse_qs -from pytube import request, YouTube, extract +from pytube import request, YouTube from pytube.helpers import cache, deprecated from pytube.mixins import install_proxy @@ -37,10 +37,14 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): # Needs testing with non-English self.last_update: Optional[date] = None - results = re.search(r"
  • Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html) + results = re.search( + r"
  • Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html + ) if results: month, day, year = results.groups() - self.last_update = datetime.strptime(f"{month} {day:0>2} {year}", "%b %d %Y").date() + self.last_update = datetime.strptime( + f"{month} {day:0>2} {year}", "%b %d %Y" + ).date() @staticmethod def _find_load_more_url(req: str) -> Optional[str]: @@ -56,11 +60,10 @@ def _find_load_more_url(req: str) -> Optional[str]: return None - def parse_links(self) -> List[str]: + def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]: """Parse the video links from the page source, extracts and returns the /watch?v= part from video link href """ - req = self.html # split the page source by line and process each line @@ -71,6 +74,12 @@ def parse_links(self) -> List[str]: # Simulating a browser request for the load more link load_more_url = self._find_load_more_url(req) while load_more_url: # there is an url found + if until_watch_id: + try: + trim_index = link_list.index(f"/watch?v={until_watch_id}") + return link_list[:trim_index] + except ValueError: + pass logger.debug("load more url: %s", load_more_url) req = request.get(load_more_url) load_more = json.loads(req) @@ -94,12 +103,8 @@ def trimmed(self, video_id: str) -> List[str]: :returns: List of video URLs from the playlist trimmed at the given ID """ - trimmed_urls = [] - for url in self.video_urls: - if extract.video_id(url) == video_id: - break - trimmed_urls.append(url) - return trimmed_urls + trimmed_watch = self.parse_links(until_watch_id=video_id) + return [self._video_url(watch_path) for watch_path in trimmed_watch] @property # type: ignore @cache @@ -109,9 +114,7 @@ def video_urls(self) -> List[str]: :returns: List of video URLs """ - return [ - "https://www.youtube.com" + watch_path for watch_path in self.parse_links() - ] + return [self._video_url(watch_path) for watch_path in self.parse_links()] @property def videos(self) -> Iterable[YouTube]: @@ -221,3 +224,7 @@ def title(self) -> Optional[str]: .replace("- YouTube", "") .strip() ) + + @staticmethod + def _video_url(watch_path: str): + return f"https://www.youtube.com{watch_path}" diff --git a/tests/contrib/test_playlist.py b/tests/contrib/test_playlist.py index 94857910f..3df23ad57 100644 --- a/tests/contrib/test_playlist.py +++ b/tests/contrib/test_playlist.py @@ -130,7 +130,8 @@ def test_trimmed(request_get, playlist_html): url = "https://www.fakeurl.com/playlist?list=whatever" request_get.return_value = playlist_html playlist = Playlist(url) - playlist._find_load_more_url = MagicMock(return_value=None) + playlist._find_load_more_url = MagicMock(return_value="dummy") + assert request_get.call_count == 1 assert playlist.trimmed("1BYu65vLKdA") == [ "https://www.youtube.com/watch?v=ujTCoH21GlA", "https://www.youtube.com/watch?v=45ryDIPHdGg", From c95bd404ce15466cb524480340710d38d7a78c31 Mon Sep 17 00:00:00 2001 From: Harold Martin Date: Sun, 2 Feb 2020 07:56:10 -0800 Subject: [PATCH 3/3] add test_load_more --- tests/contrib/test_playlist.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/contrib/test_playlist.py b/tests/contrib/test_playlist.py index 3df23ad57..a51363057 100644 --- a/tests/contrib/test_playlist.py +++ b/tests/contrib/test_playlist.py @@ -116,6 +116,20 @@ def test_videos(youtube, request_get, playlist_html): assert len(list(playlist.videos)) == 12 +@mock.patch("pytube.contrib.playlist.request.get") +@mock.patch("pytube.cli.YouTube.__init__", return_value=None) +def test_load_more(youtube, request_get, playlist_html): + url = "https://www.fakeurl.com/playlist?list=whatever" + request_get.side_effect = [ + playlist_html, + '{"content_html":"", "load_more_widget_html":""}', + ] + playlist = Playlist(url) + playlist._find_load_more_url = MagicMock(side_effect=["dummy", None]) + request_get.assert_called() + assert len(list(playlist.videos)) == 12 + + @mock.patch("pytube.contrib.playlist.request.get") @mock.patch("pytube.contrib.playlist.install_proxy", return_value=None) def test_proxy(install_proxy, request_get):