Ferdowsi
/

pytube

Model card Files Files and versions Community

hbmartin commited on Feb 2, 2020

Commit

30b06f6

1 Parent(s): 5a6acf1

prevent unnecessary load more calls when trimming

Browse files

Files changed (2) hide show

pytube/contrib/playlist.py +21 -14
tests/contrib/test_playlist.py +2 -1

pytube/contrib/playlist.py CHANGED Viewed

@@ -9,7 +9,7 @@ from datetime import date, datetime
 from typing import List, Optional, Iterable, Dict
 from urllib.parse import parse_qs
-from pytube import request, YouTube, extract
 from pytube.helpers import cache, deprecated
 from pytube.mixins import install_proxy
@@ -37,10 +37,14 @@ class Playlist:
         # Needs testing with non-English
         self.last_update: Optional[date] = None
-        results = re.search(r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html)
         if results:
             month, day, year = results.groups()
-            self.last_update = datetime.strptime(f"{month} {day:0>2} {year}", "%b %d %Y").date()
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
@@ -56,11 +60,10 @@ class Playlist:
         return None
-    def parse_links(self) -> List[str]:
         """Parse the video links from the page source, extracts and
         returns the /watch?v= part from video link href
         """
         req = self.html
         # split the page source by line and process each line
@@ -71,6 +74,12 @@ class Playlist:
         # Simulating a browser request for the load more link
         load_more_url = self._find_load_more_url(req)
         while load_more_url:  # there is an url found
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
@@ -94,12 +103,8 @@ class Playlist:
         :returns:
             List of video URLs from the playlist trimmed at the given ID
         """
-        trimmed_urls = []
-        for url in self.video_urls:
-            if extract.video_id(url) == video_id:
-                break
-            trimmed_urls.append(url)
-        return trimmed_urls
     @property  # type: ignore
     @cache
@@ -109,9 +114,7 @@ class Playlist:
         :returns:
             List of video URLs
         """
-        return [
-            "https://www.youtube.com" + watch_path for watch_path in self.parse_links()
-        ]
     @property
     def videos(self) -> Iterable[YouTube]:
@@ -221,3 +224,7 @@ class Playlist:
             .replace("- YouTube", "")
             .strip()
         )

 from typing import List, Optional, Iterable, Dict
 from urllib.parse import parse_qs
+from pytube import request, YouTube
 from pytube.helpers import cache, deprecated
 from pytube.mixins import install_proxy
         # Needs testing with non-English
         self.last_update: Optional[date] = None
+        results = re.search(
+            r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html
+        )
         if results:
             month, day, year = results.groups()
+            self.last_update = datetime.strptime(
+                f"{month} {day:0>2} {year}", "%b %d %Y"
+            ).date()
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
         return None
+    def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]:
         """Parse the video links from the page source, extracts and
         returns the /watch?v= part from video link href
         """
         req = self.html
         # split the page source by line and process each line
         # Simulating a browser request for the load more link
         load_more_url = self._find_load_more_url(req)
         while load_more_url:  # there is an url found
+            if until_watch_id:
+                try:
+                    trim_index = link_list.index(f"/watch?v={until_watch_id}")
+                    return link_list[:trim_index]
+                except ValueError:
+                    pass
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
         :returns:
             List of video URLs from the playlist trimmed at the given ID
         """
+        trimmed_watch = self.parse_links(until_watch_id=video_id)
+        return [self._video_url(watch_path) for watch_path in trimmed_watch]
     @property  # type: ignore
     @cache
         :returns:
             List of video URLs
         """
+        return [self._video_url(watch_path) for watch_path in self.parse_links()]
     @property
     def videos(self) -> Iterable[YouTube]:
             .replace("- YouTube", "")
             .strip()
         )
+    @staticmethod
+    def _video_url(watch_path: str):
+        return f"https://www.youtube.com{watch_path}"

tests/contrib/test_playlist.py CHANGED Viewed

@@ -130,7 +130,8 @@ def test_trimmed(request_get, playlist_html):
     url = "https://www.fakeurl.com/playlist?list=whatever"
     request_get.return_value = playlist_html
     playlist = Playlist(url)
-    playlist._find_load_more_url = MagicMock(return_value=None)
     assert playlist.trimmed("1BYu65vLKdA") == [
         "https://www.youtube.com/watch?v=ujTCoH21GlA",
         "https://www.youtube.com/watch?v=45ryDIPHdGg",

     url = "https://www.fakeurl.com/playlist?list=whatever"
     request_get.return_value = playlist_html
     playlist = Playlist(url)
+    playlist._find_load_more_url = MagicMock(return_value="dummy")
+    assert request_get.call_count == 1
     assert playlist.trimmed("1BYu65vLKdA") == [
         "https://www.youtube.com/watch?v=ujTCoH21GlA",
         "https://www.youtube.com/watch?v=45ryDIPHdGg",