Ferdowsi
/

pytube

Model card Files Files and versions Community

hbmartin commited on Feb 2, 2020

Commit

78f9f26

unverified ·

2 Parent(s): 090fc4a c95bd40

Merge pull request #31 from hbmartin/playlist-addons

Browse files

Files changed (2) hide show

pytube/contrib/playlist.py +27 -12
tests/contrib/test_playlist.py +25 -1

pytube/contrib/playlist.py CHANGED Viewed

@@ -5,10 +5,11 @@ import json
 import logging
 import re
 from collections import OrderedDict
 from typing import List, Optional, Iterable, Dict
 from urllib.parse import parse_qs
-from pytube import request, YouTube, extract
 from pytube.helpers import cache, deprecated
 from pytube.mixins import install_proxy
@@ -34,6 +35,17 @@ class Playlist:
         )
         self.html = request.get(self.playlist_url)
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
         """Given an html page or a fragment thereof, looks for
@@ -48,11 +60,10 @@ class Playlist:
         return None
-    def parse_links(self) -> List[str]:
         """Parse the video links from the page source, extracts and
         returns the /watch?v= part from video link href
         """
         req = self.html
         # split the page source by line and process each line
@@ -63,6 +74,12 @@ class Playlist:
         # Simulating a browser request for the load more link
         load_more_url = self._find_load_more_url(req)
         while load_more_url:  # there is an url found
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
@@ -86,12 +103,8 @@ class Playlist:
         :returns:
             List of video URLs from the playlist trimmed at the given ID
         """
-        trimmed_urls = []
-        for url in self.video_urls:
-            if extract.video_id(url) == video_id:
-                break
-            trimmed_urls.append(url)
-        return trimmed_urls
     @property  # type: ignore
     @cache
@@ -101,9 +114,7 @@ class Playlist:
         :returns:
             List of video URLs
         """
-        return [
-            "https://www.youtube.com" + watch_path for watch_path in self.parse_links()
-        ]
     @property
     def videos(self) -> Iterable[YouTube]:
@@ -213,3 +224,7 @@ class Playlist:
             .replace("- YouTube", "")
             .strip()
         )

 import logging
 import re
 from collections import OrderedDict
+from datetime import date, datetime
 from typing import List, Optional, Iterable, Dict
 from urllib.parse import parse_qs
+from pytube import request, YouTube
 from pytube.helpers import cache, deprecated
 from pytube.mixins import install_proxy
         )
         self.html = request.get(self.playlist_url)
+        # Needs testing with non-English
+        self.last_update: Optional[date] = None
+        results = re.search(
+            r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html
+        )
+        if results:
+            month, day, year = results.groups()
+            self.last_update = datetime.strptime(
+                f"{month} {day:0>2} {year}", "%b %d %Y"
+            ).date()
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
         """Given an html page or a fragment thereof, looks for
         return None
+    def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]:
         """Parse the video links from the page source, extracts and
         returns the /watch?v= part from video link href
         """
         req = self.html
         # split the page source by line and process each line
         # Simulating a browser request for the load more link
         load_more_url = self._find_load_more_url(req)
         while load_more_url:  # there is an url found
+            if until_watch_id:
+                try:
+                    trim_index = link_list.index(f"/watch?v={until_watch_id}")
+                    return link_list[:trim_index]
+                except ValueError:
+                    pass
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
         :returns:
             List of video URLs from the playlist trimmed at the given ID
         """
+        trimmed_watch = self.parse_links(until_watch_id=video_id)
+        return [self._video_url(watch_path) for watch_path in trimmed_watch]
     @property  # type: ignore
     @cache
         :returns:
             List of video URLs
         """
+        return [self._video_url(watch_path) for watch_path in self.parse_links()]
     @property
     def videos(self) -> Iterable[YouTube]:
             .replace("- YouTube", "")
             .strip()
         )
+    @staticmethod
+    def _video_url(watch_path: str):
+        return f"https://www.youtube.com{watch_path}"

tests/contrib/test_playlist.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from unittest import mock
 from unittest.mock import MagicMock
@@ -39,6 +40,14 @@ def test_init_with_watch_url(request_get):
     )
 @mock.patch("pytube.contrib.playlist.request.get")
 def test_init_with_watch_id(request_get):
     request_get.return_value = ""
@@ -107,6 +116,20 @@ def test_videos(youtube, request_get, playlist_html):
     assert len(list(playlist.videos)) == 12
 @mock.patch("pytube.contrib.playlist.request.get")
 @mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
 def test_proxy(install_proxy, request_get):
@@ -121,7 +144,8 @@ def test_trimmed(request_get, playlist_html):
     url = "https://www.fakeurl.com/playlist?list=whatever"
     request_get.return_value = playlist_html
     playlist = Playlist(url)
-    playlist._find_load_more_url = MagicMock(return_value=None)
     assert playlist.trimmed("1BYu65vLKdA") == [
         "https://www.youtube.com/watch?v=ujTCoH21GlA",
         "https://www.youtube.com/watch?v=45ryDIPHdGg",

 # -*- coding: utf-8 -*-
+import datetime
 from unittest import mock
 from unittest.mock import MagicMock
     )
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_last_update(request_get, playlist_html):
+    expected = datetime.date(2019, 3, 7)
+    request_get.return_value = playlist_html
+    playlist = Playlist("url")
+    assert playlist.last_update == expected
 @mock.patch("pytube.contrib.playlist.request.get")
 def test_init_with_watch_id(request_get):
     request_get.return_value = ""
     assert len(list(playlist.videos)) == 12
+@mock.patch("pytube.contrib.playlist.request.get")
+@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
+def test_load_more(youtube, request_get, playlist_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.side_effect = [
+        playlist_html,
+        '{"content_html":"", "load_more_widget_html":""}',
+    ]
+    playlist = Playlist(url)
+    playlist._find_load_more_url = MagicMock(side_effect=["dummy", None])
+    request_get.assert_called()
+    assert len(list(playlist.videos)) == 12
 @mock.patch("pytube.contrib.playlist.request.get")
 @mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
 def test_proxy(install_proxy, request_get):
     url = "https://www.fakeurl.com/playlist?list=whatever"
     request_get.return_value = playlist_html
     playlist = Playlist(url)
+    playlist._find_load_more_url = MagicMock(return_value="dummy")
+    assert request_get.call_count == 1
     assert playlist.trimmed("1BYu65vLKdA") == [
         "https://www.youtube.com/watch?v=ujTCoH21GlA",
         "https://www.youtube.com/watch?v=45ryDIPHdGg",