Taylor Fox Dahlin commited on
Commit
181c88c
·
unverified ·
1 Parent(s): 257e6d2

[DRAFT] Feature/search (#1030)

Browse files

* Added search functionality.

* Added repr method to YouTube to make it more useful.

* Added some docstrings + comments for clarity.

docs/api.rst CHANGED
@@ -55,6 +55,13 @@ CaptionQuery Object
55
  :members:
56
  :inherited-members:
57
 
 
 
 
 
 
 
 
58
  Extract
59
  -------
60
 
 
55
  :members:
56
  :inherited-members:
57
 
58
+ Search Object
59
+ -------------
60
+
61
+ .. autoclass:: pytube.contrib.search.Search
62
+ :members:
63
+ :inherited-members:
64
+
65
  Extract
66
  -------
67
 
docs/index.rst CHANGED
@@ -59,6 +59,7 @@ of pytube.
59
  user/captions
60
  user/playlist
61
  user/channel
 
62
  user/cli
63
  user/exceptions
64
 
 
59
  user/captions
60
  user/playlist
61
  user/channel
62
+ user/search
63
  user/cli
64
  user/exceptions
65
 
docs/user/search.rst ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .. _search:
2
+
3
+ Using the search feature
4
+ ========================
5
+
6
+ Pytube includes functionality to search YouTube and return results almost
7
+ identical to those you would find using the search bar on YouTube's website.
8
+ The integration into pytube means that we can directly provide you with
9
+ YouTube objects that can be inspected and dowloaded, instead of needing to do
10
+ additional processing.
11
+
12
+ Using the Search object is really easy::
13
+
14
+ >>> from pytube import Search
15
+ >>> s = Search('YouTube Rewind')
16
+ >>> len(s.results)
17
+ 17
18
+ >>> s.results
19
+ [\
20
+ <pytube.__main__.YouTube object: videoId=YbJOTdZBX1g>, \
21
+ <pytube.__main__.YouTube object: videoId=PKtnafFtfEo>, \
22
+ ...\
23
+ ]
24
+ >>>
25
+
26
+ Due to the potential for an endless stream of results, and in order to prevent
27
+ a user from accidentally entering an infinite loop of requesting additional
28
+ results, the ``.results`` attribute will only ever request the first set of
29
+ search results. Additional results can be explicitly requested by using the
30
+ ``.get_next_results()`` method, which will append any additional results to
31
+ the ``.results`` attribute::
32
+
33
+ >>> s.get_next_results()
34
+ >>> len(s.results)
35
+ 34
36
+ >>>
37
+
38
+ Additional functionality
39
+ ========================
40
+
41
+ In addition to the basic search functionality which returns YouTube objects,
42
+ searches also have associated autocomplete suggestions. These can be accessed
43
+ as follows::
44
+
45
+ >>> s.completion_suggestions
46
+ [\
47
+ 'can this video get 1 million dislikes', \
48
+ 'youtube rewind 2020 musical', \
49
+ ...\
50
+ ]
pytube/__init__.py CHANGED
@@ -16,3 +16,4 @@ from pytube.query import CaptionQuery, StreamQuery
16
  from pytube.__main__ import YouTube
17
  from pytube.contrib.playlist import Playlist
18
  from pytube.contrib.channel import Channel
 
 
16
  from pytube.__main__ import YouTube
17
  from pytube.contrib.playlist import Playlist
18
  from pytube.contrib.channel import Channel
19
+ from pytube.contrib.search import Search
pytube/__main__.py CHANGED
@@ -85,6 +85,9 @@ class YouTube:
85
  self._title = None
86
  self._publish_date = None
87
 
 
 
 
88
  @property
89
  def watch_html(self):
90
  if self._watch_html:
 
85
  self._title = None
86
  self._publish_date = None
87
 
88
+ def __repr__(self):
89
+ return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
90
+
91
  @property
92
  def watch_html(self):
93
  if self._watch_html:
pytube/contrib/search.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module for interacting with YouTube search."""
2
+ # Native python imports
3
+ import logging
4
+
5
+ # Local imports
6
+ from pytube import YouTube
7
+ from pytube.innertube import InnerTube
8
+
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class Search:
14
+ def __init__(self, query):
15
+ """Initialize Search object.
16
+
17
+ :param str query:
18
+ Search query provided by the user.
19
+ """
20
+ self.query = query
21
+ self._innertube_client = InnerTube()
22
+
23
+ # The first search, without a continuation, is structured differently
24
+ # and contains completion suggestions, so we must store this separately
25
+ self._initial_results = None
26
+
27
+ self._results = None
28
+ self._completion_suggestions = None
29
+
30
+ # Used for keeping track of query continuations so that new results
31
+ # are always returned when get_next_results() is called
32
+ self._current_continuation = None
33
+
34
+ @property
35
+ def completion_suggestions(self):
36
+ """Return query autocompletion suggestions for the query.
37
+
38
+ :rtype: list
39
+ :returns:
40
+ A list of autocomplete suggestions provided by YouTube for the query.
41
+ """
42
+ if self._completion_suggestions:
43
+ return self._completion_suggestions
44
+ if self.results:
45
+ self._completion_suggestions = self._initial_results['refinements']
46
+ return self._completion_suggestions
47
+
48
+ @property
49
+ def results(self):
50
+ """Return search results.
51
+
52
+ On first call, will generate and return the first set of results.
53
+ Additional results can be generated using ``.get_next_results()``.
54
+
55
+ :rtype: list
56
+ :returns:
57
+ A list of YouTube objects.
58
+ """
59
+ if self._results:
60
+ return self._results
61
+
62
+ videos, continuation = self.fetch_and_parse()
63
+ self._results = videos
64
+ self._current_continuation = continuation
65
+ return self._results
66
+
67
+ def get_next_results(self):
68
+ """Use the stored continuation string to fetch the next set of results.
69
+
70
+ This method does not return the results, but instead updates the results property.
71
+ """
72
+ if self._current_continuation:
73
+ videos, continuation = self.fetch_and_parse(self._current_continuation)
74
+ self._results.extend(videos)
75
+ self._current_continuation = continuation
76
+ else:
77
+ raise IndexError
78
+
79
+ def fetch_and_parse(self, continuation=None):
80
+ """Fetch from the innertube API and parse the results.
81
+
82
+ :param str continuation:
83
+ Continuation string for fetching results.
84
+ :rtype: tuple
85
+ :returns:
86
+ A tuple of a list of YouTube objects and a continuation string.
87
+ """
88
+ # Begin by executing the query and identifying the relevant sections
89
+ # of the results
90
+ raw_results = self.fetch_query(continuation)
91
+
92
+ # Initial result is handled by try block, continuations by except block
93
+ try:
94
+ sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
95
+ 'primaryContents']['sectionListRenderer']['contents']
96
+ except KeyError:
97
+ sections = raw_results['onResponseReceivedCommands'][0][
98
+ 'appendContinuationItemsAction']['continuationItems']
99
+ item_renderer = None
100
+ continuation_renderer = None
101
+ for s in sections:
102
+ if 'itemSectionRenderer' in s:
103
+ item_renderer = s['itemSectionRenderer']
104
+ if 'continuationItemRenderer' in s:
105
+ continuation_renderer = s['continuationItemRenderer']
106
+
107
+ # If the continuationItemRenderer doesn't exist, assume no further results
108
+ if continuation_renderer:
109
+ next_continuation = continuation_renderer['continuationEndpoint'][
110
+ 'continuationCommand']['token']
111
+ else:
112
+ next_continuation = None
113
+
114
+ # If the itemSectionRenderer doesn't exist, assume no results.
115
+ if item_renderer:
116
+ videos = []
117
+ raw_video_list = item_renderer['contents']
118
+ for video_details in raw_video_list:
119
+ # Skip over ads
120
+ if video_details.get('searchPyvRenderer', {}).get('ads', None):
121
+ continue
122
+
123
+ # Skip "recommended" type videos e.g. "people also watched" and "popular X"
124
+ # that break up the search results
125
+ if 'shelfRenderer' in video_details:
126
+ continue
127
+
128
+ # Skip auto-generated "mix" playlist results
129
+ if 'radioRenderer' in video_details:
130
+ continue
131
+
132
+ # Skip playlist results
133
+ if 'playlistRenderer' in video_details:
134
+ continue
135
+
136
+ # Skip channel results
137
+ if 'channelRenderer' in video_details:
138
+ continue
139
+
140
+ if 'videoRenderer' not in video_details:
141
+ logger.warn('Unexpected renderer encountered.')
142
+ logger.warn(f'Renderer name: {video_details.keys()}')
143
+ logger.warn(f'Search term: {self.query}')
144
+ logger.warn(
145
+ 'Please open an issue at '
146
+ 'https://github.com/pytube/pytube/issues '
147
+ 'and provide this log output.'
148
+ )
149
+ continue
150
+
151
+ # Extract relevant video information from the details.
152
+ # Some of this can be used to pre-populate attributes of the
153
+ # YouTube object.
154
+ vid_renderer = video_details['videoRenderer']
155
+ vid_id = vid_renderer['videoId']
156
+ vid_url = f'https://www.youtube.com/watch?v={vid_id}'
157
+ vid_title = vid_renderer['title']['runs'][0]['text']
158
+ vid_channel_name = vid_renderer['ownerText']['runs'][0]['text']
159
+ vid_channel_uri = vid_renderer['ownerText']['runs'][0][
160
+ 'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
161
+ # Livestreams have "runs", non-livestreams have "simpleText",
162
+ # and scheduled releases do not have 'viewCountText'
163
+ if 'viewCountText' in vid_renderer:
164
+ if 'runs' in vid_renderer['viewCountText']:
165
+ vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text']
166
+ else:
167
+ vid_view_count_text = vid_renderer['viewCountText']['simpleText']
168
+ # Strip ' views' text, then remove commas
169
+ vid_view_count = int(vid_view_count_text.split()[0].replace(',',''))
170
+ else:
171
+ vid_view_count = 0
172
+ if 'lengthText' in vid_renderer:
173
+ vid_length = vid_renderer['lengthText']['simpleText']
174
+ else:
175
+ vid_length = None
176
+
177
+ vid_metadata = {
178
+ 'id': vid_id,
179
+ 'url': vid_url,
180
+ 'title': vid_title,
181
+ 'channel_name': vid_channel_name,
182
+ 'channel_url': vid_channel_uri,
183
+ 'view_count': vid_view_count,
184
+ 'length': vid_length
185
+ }
186
+
187
+ # Construct YouTube object from metadata and append to results
188
+ vid = YouTube(vid_metadata['url'])
189
+ vid.author = vid_metadata['channel_name']
190
+ vid.title = vid_metadata['title']
191
+ videos.append(vid)
192
+ else:
193
+ videos = None
194
+
195
+ return videos, next_continuation
196
+
197
+ def fetch_query(self, continuation=None):
198
+ """Fetch raw results from the innertube API.
199
+
200
+ :param str continuation:
201
+ Continuation string for fetching results.
202
+ :rtype: dict
203
+ :returns:
204
+ The raw json object returned by the innertube API.
205
+ """
206
+ query_results = self._innertube_client.search(self.query, continuation)
207
+ if not self._initial_results:
208
+ self._initial_results = query_results
209
+ return query_results # noqa:R504
pytube/innertube.py CHANGED
@@ -103,7 +103,7 @@ class InnerTube:
103
  query.update(self.base_params)
104
  return self._call_api(endpoint, query, self.base_data)
105
 
106
- def search(self, search_query):
107
  """Make a request to the search endpoint.
108
 
109
  :param str search_query:
@@ -117,4 +117,8 @@ class InnerTube:
117
  'query': search_query
118
  }
119
  query.update(self.base_params)
120
- return self._call_api(endpoint, query, self.base_data)
 
 
 
 
 
103
  query.update(self.base_params)
104
  return self._call_api(endpoint, query, self.base_data)
105
 
106
+ def search(self, search_query, continuation=None):
107
  """Make a request to the search endpoint.
108
 
109
  :param str search_query:
 
117
  'query': search_query
118
  }
119
  query.update(self.base_params)
120
+ data = {}
121
+ if continuation:
122
+ data['continuation'] = continuation
123
+ data.update(self.base_data)
124
+ return self._call_api(endpoint, query, data)