Ferdowsi
/

pytube

Model card Files Files and versions Community

hbmartin commited on Jan 23, 2020

Commit

a9b1e3b

1 Parent(s): edef73e

small cleanups

Browse files

Files changed (5) hide show

pytube/__main__.py +1 -1
pytube/captions.py +1 -1
pytube/contrib/playlist.py +11 -11
pytube/helpers.py +10 -1
pytube/streams.py +2 -6

pytube/__main__.py CHANGED Viewed

@@ -61,7 +61,7 @@ class YouTube:
         # the url to vid info, parsed from watch html
         self.vid_info_url: Optional[str] = None
-        self.vid_info_raw = None  # content fetched by vid_info_url
         self.vid_info: Optional[Dict] = None  # parsed content of vid_info_raw
         self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>

         # the url to vid info, parsed from watch html
         self.vid_info_url: Optional[str] = None
+        self.vid_info_raw: Optional[str] = None  # content fetched by vid_info_url
         self.vid_info: Optional[Dict] = None  # parsed content of vid_info_raw
         self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>

pytube/captions.py CHANGED Viewed

@@ -25,7 +25,7 @@ class Caption:
         self.code = caption_track["languageCode"]
     @property
-    def xml_captions(self):
         """Download the xml caption tracks."""
         return request.get(self.url)

         self.code = caption_track["languageCode"]
     @property
+    def xml_captions(self) -> str:
         """Download the xml caption tracks."""
         return request.get(self.url)

pytube/contrib/playlist.py CHANGED Viewed

@@ -40,19 +40,19 @@ class Playlist:
         return self.playlist_url
     @staticmethod
-    def _load_more_url(req):
         """Given an html page or a fragment thereof, looks for
         and returns the "load more" url if found.
         """
-        try:
-            load_more_url = "https://www.youtube.com" + re.search(
                 r"data-uix-load-more-href=\"(/browse_ajax\?"
                 'action_continuation=.*?)"',
                 req,
-            ).group(1)
-        except AttributeError:
-            load_more_url = ""
-        return load_more_url
     def parse_links(self) -> List[str]:
         """Parse the video links from the page source, extracts and
@@ -69,8 +69,8 @@ class Playlist:
         # The above only returns 100 or fewer links
         # Simulating a browser request for the load more link
-        load_more_url = self._load_more_url(req)
-        while len(load_more_url) > 0:  # there is an url found
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
@@ -79,11 +79,11 @@ class Playlist:
             )
             # remove duplicates
             link_list.extend(list(OrderedDict.fromkeys(videos)))
-            load_more_url = self._load_more_url(load_more["load_more_widget_html"],)
         return link_list
-    def populate_video_urls(self):
         """Construct complete links of all the videos in playlist and
         populate video_urls list

         return self.playlist_url
     @staticmethod
+    def _find_load_more_url(req: str) -> Optional[str]:
         """Given an html page or a fragment thereof, looks for
         and returns the "load more" url if found.
         """
+        match = re.search(
                 r"data-uix-load-more-href=\"(/browse_ajax\?"
                 'action_continuation=.*?)"',
                 req,
+            )
+        if match:
+            return "https://www.youtube.com" + match.group(1)
+        return None
     def parse_links(self) -> List[str]:
         """Parse the video links from the page source, extracts and
         # The above only returns 100 or fewer links
         # Simulating a browser request for the load more link
+        load_more_url = self._find_load_more_url(req)
+        while load_more_url:  # there is an url found
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
             )
             # remove duplicates
             link_list.extend(list(OrderedDict.fromkeys(videos)))
+            load_more_url = self._find_load_more_url(load_more["load_more_widget_html"], )
         return link_list
+    def populate_video_urls(self) -> None:
         """Construct complete links of all the videos in playlist and
         populate video_urls list

pytube/helpers.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 """Various helper functions implemented by pytube."""
 import logging
 import pprint
 import re
 from pytube.exceptions import RegexMatchError
@@ -99,3 +100,11 @@ def create_logger(level: int = logging.ERROR) -> logging.Logger:
     logger.addHandler(handler)
     logger.setLevel(level)
     return logger

 # -*- coding: utf-8 -*-
 """Various helper functions implemented by pytube."""
+import functools
 import logging
 import pprint
 import re
+from typing import TypeVar, Callable
 from pytube.exceptions import RegexMatchError
     logger.addHandler(handler)
     logger.setLevel(level)
     return logger
+T = TypeVar('T')
+def cache(func: Callable[..., T]) -> T:
+    return functools.lru_cache()(func)  # type: ignore

pytube/streams.py CHANGED Viewed

@@ -118,9 +118,7 @@ class Stream:
         :rtype: bool
         """
-        if self.is_progressive:
-            return True
-        return self.type == "audio"
     @property
     def includes_video_track(self) -> bool:
@@ -128,9 +126,7 @@ class Stream:
         :rtype: bool
         """
-        if self.is_progressive:
-            return True
-        return self.type == "video"
     def parse_codecs(self) -> Tuple:
         """Get the video/audio codecs from list of codecs.

         :rtype: bool
         """
+        return self.is_progressive or self.type == "audio"
     @property
     def includes_video_track(self) -> bool:
         :rtype: bool
         """
+        return self.is_progressive or self.type == "video"
     def parse_codecs(self) -> Tuple:
         """Get the video/audio codecs from list of codecs.