Ferdowsi
/

pytube

Model card Files Files and versions Community

Taylor Fox Dahlin commited on Jul 6, 2021

Commit

181c88c

unverified ·

1 Parent(s): 257e6d2

[DRAFT] Feature/search (#1030)

* Added search functionality.

* Added repr method to YouTube to make it more useful.

* Added some docstrings + comments for clarity.

Files changed (7) hide show

docs/api.rst +7 -0
docs/index.rst +1 -0
docs/user/search.rst +50 -0
pytube/__init__.py +1 -0
pytube/__main__.py +3 -0
pytube/contrib/search.py +209 -0
pytube/innertube.py +6 -2

docs/api.rst CHANGED Viewed

@@ -55,6 +55,13 @@ CaptionQuery Object
    :members:
    :inherited-members:
 Extract
 -------

    :members:
    :inherited-members:
+Search Object
+-------------
+.. autoclass:: pytube.contrib.search.Search
+   :members:
+   :inherited-members:
 Extract
 -------

docs/index.rst CHANGED Viewed

@@ -59,6 +59,7 @@ of pytube.
    user/captions
    user/playlist
    user/channel
    user/cli
    user/exceptions

    user/captions
    user/playlist
    user/channel
+   user/search
    user/cli
    user/exceptions

docs/user/search.rst ADDED Viewed

	@@ -0,0 +1,50 @@

+.. _search:
+Using the search feature
+========================
+Pytube includes functionality to search YouTube and return results almost
+identical to those you would find using the search bar on YouTube's website.
+The integration into pytube means that we can directly provide you with
+YouTube objects that can be inspected and dowloaded, instead of needing to do
+additional processing.
+Using the Search object is really easy::
+    >>> from pytube import Search
+    >>> s = Search('YouTube Rewind')
+    >>> len(s.results)
+    17
+    >>> s.results
+    [\
+        <pytube.__main__.YouTube object: videoId=YbJOTdZBX1g>, \
+        <pytube.__main__.YouTube object: videoId=PKtnafFtfEo>, \
+        ...\
+    ]
+    >>>
+Due to the potential for an endless stream of results, and in order to prevent
+a user from accidentally entering an infinite loop of requesting additional
+results, the ``.results`` attribute will only ever request the first set of
+search results. Additional results can be explicitly requested by using the
+``.get_next_results()`` method, which will append any additional results to
+the ``.results`` attribute::
+    >>> s.get_next_results()
+    >>> len(s.results)
+    34
+    >>>
+Additional functionality
+========================
+In addition to the basic search functionality which returns YouTube objects,
+searches also have associated autocomplete suggestions. These can be accessed
+as follows::
+    >>> s.completion_suggestions
+    [\
+        'can this video get 1 million dislikes', \
+        'youtube rewind 2020 musical', \
+        ...\
+    ]

pytube/__init__.py CHANGED Viewed

@@ -16,3 +16,4 @@ from pytube.query import CaptionQuery, StreamQuery
 from pytube.__main__ import YouTube
 from pytube.contrib.playlist import Playlist
 from pytube.contrib.channel import Channel

 from pytube.__main__ import YouTube
 from pytube.contrib.playlist import Playlist
 from pytube.contrib.channel import Channel
+from pytube.contrib.search import Search

pytube/__main__.py CHANGED Viewed

@@ -85,6 +85,9 @@ class YouTube:
         self._title = None
         self._publish_date = None
     @property
     def watch_html(self):
         if self._watch_html:

         self._title = None
         self._publish_date = None
+    def __repr__(self):
+        return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
     @property
     def watch_html(self):
         if self._watch_html:

pytube/contrib/search.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""Module for interacting with YouTube search."""
+# Native python imports
+import logging
+# Local imports
+from pytube import YouTube
+from pytube.innertube import InnerTube
+logger = logging.getLogger(__name__)
+class Search:
+    def __init__(self, query):
+        """Initialize Search object.
+        :param str query:
+            Search query provided by the user.
+        """
+        self.query = query
+        self._innertube_client = InnerTube()
+        # The first search, without a continuation, is structured differently
+        #  and contains completion suggestions, so we must store this separately
+        self._initial_results = None
+        self._results = None
+        self._completion_suggestions = None
+        # Used for keeping track of query continuations so that new results
+        #  are always returned when get_next_results() is called
+        self._current_continuation = None
+    @property
+    def completion_suggestions(self):
+        """Return query autocompletion suggestions for the query.
+        :rtype: list
+        :returns:
+            A list of autocomplete suggestions provided by YouTube for the query.
+        """
+        if self._completion_suggestions:
+            return self._completion_suggestions
+        if self.results:
+            self._completion_suggestions = self._initial_results['refinements']
+        return self._completion_suggestions
+    @property
+    def results(self):
+        """Return search results.
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+        :rtype: list
+        :returns:
+            A list of YouTube objects.
+        """
+        if self._results:
+            return self._results
+        videos, continuation = self.fetch_and_parse()
+        self._results = videos
+        self._current_continuation = continuation
+        return self._results
+    def get_next_results(self):
+        """Use the stored continuation string to fetch the next set of results.
+        This method does not return the results, but instead updates the results property.
+        """
+        if self._current_continuation:
+            videos, continuation = self.fetch_and_parse(self._current_continuation)
+            self._results.extend(videos)
+            self._current_continuation = continuation
+        else:
+            raise IndexError
+    def fetch_and_parse(self, continuation=None):
+        """Fetch from the innertube API and parse the results.
+        :param str continuation:
+            Continuation string for fetching results.
+        :rtype: tuple
+        :returns:
+            A tuple of a list of YouTube objects and a continuation string.
+        """
+        # Begin by executing the query and identifying the relevant sections
+        #  of the results
+        raw_results = self.fetch_query(continuation)
+        # Initial result is handled by try block, continuations by except block
+        try:
+            sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
+                'primaryContents']['sectionListRenderer']['contents']
+        except KeyError:
+            sections = raw_results['onResponseReceivedCommands'][0][
+                'appendContinuationItemsAction']['continuationItems']
+        item_renderer = None
+        continuation_renderer = None
+        for s in sections:
+            if 'itemSectionRenderer' in s:
+                item_renderer = s['itemSectionRenderer']
+            if 'continuationItemRenderer' in s:
+                continuation_renderer = s['continuationItemRenderer']
+        # If the continuationItemRenderer doesn't exist, assume no further results
+        if continuation_renderer:
+            next_continuation = continuation_renderer['continuationEndpoint'][
+                'continuationCommand']['token']
+        else:
+            next_continuation = None
+        # If the itemSectionRenderer doesn't exist, assume no results.
+        if item_renderer:
+            videos = []
+            raw_video_list = item_renderer['contents']
+            for video_details in raw_video_list:
+                # Skip over ads
+                if video_details.get('searchPyvRenderer', {}).get('ads', None):
+                    continue
+                # Skip "recommended" type videos e.g. "people also watched" and "popular X"
+                #  that break up the search results
+                if 'shelfRenderer' in video_details:
+                    continue
+                # Skip auto-generated "mix" playlist results
+                if 'radioRenderer' in video_details:
+                    continue
+                # Skip playlist results
+                if 'playlistRenderer' in video_details:
+                    continue
+                # Skip channel results
+                if 'channelRenderer' in video_details:
+                    continue
+                if 'videoRenderer' not in video_details:
+                    logger.warn('Unexpected renderer encountered.')
+                    logger.warn(f'Renderer name: {video_details.keys()}')
+                    logger.warn(f'Search term: {self.query}')
+                    logger.warn(
+                        'Please open an issue at '
+                        'https://github.com/pytube/pytube/issues '
+                        'and provide this log output.'
+                    )
+                    continue
+                # Extract relevant video information from the details.
+                # Some of this can be used to pre-populate attributes of the
+                #  YouTube object.
+                vid_renderer = video_details['videoRenderer']
+                vid_id = vid_renderer['videoId']
+                vid_url = f'https://www.youtube.com/watch?v={vid_id}'
+                vid_title = vid_renderer['title']['runs'][0]['text']
+                vid_channel_name = vid_renderer['ownerText']['runs'][0]['text']
+                vid_channel_uri = vid_renderer['ownerText']['runs'][0][
+                    'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+                # Livestreams have "runs", non-livestreams have "simpleText",
+                #  and scheduled releases do not have 'viewCountText'
+                if 'viewCountText' in vid_renderer:
+                    if 'runs' in vid_renderer['viewCountText']:
+                        vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text']
+                    else:
+                        vid_view_count_text = vid_renderer['viewCountText']['simpleText']
+                    # Strip ' views' text, then remove commas
+                    vid_view_count = int(vid_view_count_text.split()[0].replace(',',''))
+                else:
+                    vid_view_count = 0
+                if 'lengthText' in vid_renderer:
+                    vid_length = vid_renderer['lengthText']['simpleText']
+                else:
+                    vid_length = None
+                vid_metadata = {
+                    'id': vid_id,
+                    'url': vid_url,
+                    'title': vid_title,
+                    'channel_name': vid_channel_name,
+                    'channel_url': vid_channel_uri,
+                    'view_count': vid_view_count,
+                    'length': vid_length
+                }
+                # Construct YouTube object from metadata and append to results
+                vid = YouTube(vid_metadata['url'])
+                vid.author = vid_metadata['channel_name']
+                vid.title = vid_metadata['title']
+                videos.append(vid)
+        else:
+            videos = None
+        return videos, next_continuation
+    def fetch_query(self, continuation=None):
+        """Fetch raw results from the innertube API.
+        :param str continuation:
+            Continuation string for fetching results.
+        :rtype: dict
+        :returns:
+            The raw json object returned by the innertube API.
+        """
+        query_results = self._innertube_client.search(self.query, continuation)
+        if not self._initial_results:
+            self._initial_results = query_results
+        return query_results  # noqa:R504

pytube/innertube.py CHANGED Viewed

@@ -103,7 +103,7 @@ class InnerTube:
         query.update(self.base_params)
         return self._call_api(endpoint, query, self.base_data)
-    def search(self, search_query):
         """Make a request to the search endpoint.
         :param str search_query:
@@ -117,4 +117,8 @@ class InnerTube:
             'query': search_query
         }
         query.update(self.base_params)
-        return self._call_api(endpoint, query, self.base_data)

         query.update(self.base_params)
         return self._call_api(endpoint, query, self.base_data)
+    def search(self, search_query, continuation=None):
         """Make a request to the search endpoint.
         :param str search_query:
             'query': search_query
         }
         query.update(self.base_params)
+        data = {}
+        if continuation:
+            data['continuation'] = continuation
+        data.update(self.base_data)
+        return self._call_api(endpoint, query, data)