Taylor Fox Dahlin
commited on
[DRAFT] Feature/search (#1030)
Browse files* Added search functionality.
* Added repr method to YouTube to make it more useful.
* Added some docstrings + comments for clarity.
- docs/api.rst +7 -0
- docs/index.rst +1 -0
- docs/user/search.rst +50 -0
- pytube/__init__.py +1 -0
- pytube/__main__.py +3 -0
- pytube/contrib/search.py +209 -0
- pytube/innertube.py +6 -2
docs/api.rst
CHANGED
@@ -55,6 +55,13 @@ CaptionQuery Object
|
|
55 |
:members:
|
56 |
:inherited-members:
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
Extract
|
59 |
-------
|
60 |
|
|
|
55 |
:members:
|
56 |
:inherited-members:
|
57 |
|
58 |
+
Search Object
|
59 |
+
-------------
|
60 |
+
|
61 |
+
.. autoclass:: pytube.contrib.search.Search
|
62 |
+
:members:
|
63 |
+
:inherited-members:
|
64 |
+
|
65 |
Extract
|
66 |
-------
|
67 |
|
docs/index.rst
CHANGED
@@ -59,6 +59,7 @@ of pytube.
|
|
59 |
user/captions
|
60 |
user/playlist
|
61 |
user/channel
|
|
|
62 |
user/cli
|
63 |
user/exceptions
|
64 |
|
|
|
59 |
user/captions
|
60 |
user/playlist
|
61 |
user/channel
|
62 |
+
user/search
|
63 |
user/cli
|
64 |
user/exceptions
|
65 |
|
docs/user/search.rst
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.. _search:
|
2 |
+
|
3 |
+
Using the search feature
|
4 |
+
========================
|
5 |
+
|
6 |
+
Pytube includes functionality to search YouTube and return results almost
|
7 |
+
identical to those you would find using the search bar on YouTube's website.
|
8 |
+
The integration into pytube means that we can directly provide you with
|
9 |
+
YouTube objects that can be inspected and dowloaded, instead of needing to do
|
10 |
+
additional processing.
|
11 |
+
|
12 |
+
Using the Search object is really easy::
|
13 |
+
|
14 |
+
>>> from pytube import Search
|
15 |
+
>>> s = Search('YouTube Rewind')
|
16 |
+
>>> len(s.results)
|
17 |
+
17
|
18 |
+
>>> s.results
|
19 |
+
[\
|
20 |
+
<pytube.__main__.YouTube object: videoId=YbJOTdZBX1g>, \
|
21 |
+
<pytube.__main__.YouTube object: videoId=PKtnafFtfEo>, \
|
22 |
+
...\
|
23 |
+
]
|
24 |
+
>>>
|
25 |
+
|
26 |
+
Due to the potential for an endless stream of results, and in order to prevent
|
27 |
+
a user from accidentally entering an infinite loop of requesting additional
|
28 |
+
results, the ``.results`` attribute will only ever request the first set of
|
29 |
+
search results. Additional results can be explicitly requested by using the
|
30 |
+
``.get_next_results()`` method, which will append any additional results to
|
31 |
+
the ``.results`` attribute::
|
32 |
+
|
33 |
+
>>> s.get_next_results()
|
34 |
+
>>> len(s.results)
|
35 |
+
34
|
36 |
+
>>>
|
37 |
+
|
38 |
+
Additional functionality
|
39 |
+
========================
|
40 |
+
|
41 |
+
In addition to the basic search functionality which returns YouTube objects,
|
42 |
+
searches also have associated autocomplete suggestions. These can be accessed
|
43 |
+
as follows::
|
44 |
+
|
45 |
+
>>> s.completion_suggestions
|
46 |
+
[\
|
47 |
+
'can this video get 1 million dislikes', \
|
48 |
+
'youtube rewind 2020 musical', \
|
49 |
+
...\
|
50 |
+
]
|
pytube/__init__.py
CHANGED
@@ -16,3 +16,4 @@ from pytube.query import CaptionQuery, StreamQuery
|
|
16 |
from pytube.__main__ import YouTube
|
17 |
from pytube.contrib.playlist import Playlist
|
18 |
from pytube.contrib.channel import Channel
|
|
|
|
16 |
from pytube.__main__ import YouTube
|
17 |
from pytube.contrib.playlist import Playlist
|
18 |
from pytube.contrib.channel import Channel
|
19 |
+
from pytube.contrib.search import Search
|
pytube/__main__.py
CHANGED
@@ -85,6 +85,9 @@ class YouTube:
|
|
85 |
self._title = None
|
86 |
self._publish_date = None
|
87 |
|
|
|
|
|
|
|
88 |
@property
|
89 |
def watch_html(self):
|
90 |
if self._watch_html:
|
|
|
85 |
self._title = None
|
86 |
self._publish_date = None
|
87 |
|
88 |
+
def __repr__(self):
|
89 |
+
return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
|
90 |
+
|
91 |
@property
|
92 |
def watch_html(self):
|
93 |
if self._watch_html:
|
pytube/contrib/search.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module for interacting with YouTube search."""
|
2 |
+
# Native python imports
|
3 |
+
import logging
|
4 |
+
|
5 |
+
# Local imports
|
6 |
+
from pytube import YouTube
|
7 |
+
from pytube.innertube import InnerTube
|
8 |
+
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
class Search:
|
14 |
+
def __init__(self, query):
|
15 |
+
"""Initialize Search object.
|
16 |
+
|
17 |
+
:param str query:
|
18 |
+
Search query provided by the user.
|
19 |
+
"""
|
20 |
+
self.query = query
|
21 |
+
self._innertube_client = InnerTube()
|
22 |
+
|
23 |
+
# The first search, without a continuation, is structured differently
|
24 |
+
# and contains completion suggestions, so we must store this separately
|
25 |
+
self._initial_results = None
|
26 |
+
|
27 |
+
self._results = None
|
28 |
+
self._completion_suggestions = None
|
29 |
+
|
30 |
+
# Used for keeping track of query continuations so that new results
|
31 |
+
# are always returned when get_next_results() is called
|
32 |
+
self._current_continuation = None
|
33 |
+
|
34 |
+
@property
|
35 |
+
def completion_suggestions(self):
|
36 |
+
"""Return query autocompletion suggestions for the query.
|
37 |
+
|
38 |
+
:rtype: list
|
39 |
+
:returns:
|
40 |
+
A list of autocomplete suggestions provided by YouTube for the query.
|
41 |
+
"""
|
42 |
+
if self._completion_suggestions:
|
43 |
+
return self._completion_suggestions
|
44 |
+
if self.results:
|
45 |
+
self._completion_suggestions = self._initial_results['refinements']
|
46 |
+
return self._completion_suggestions
|
47 |
+
|
48 |
+
@property
|
49 |
+
def results(self):
|
50 |
+
"""Return search results.
|
51 |
+
|
52 |
+
On first call, will generate and return the first set of results.
|
53 |
+
Additional results can be generated using ``.get_next_results()``.
|
54 |
+
|
55 |
+
:rtype: list
|
56 |
+
:returns:
|
57 |
+
A list of YouTube objects.
|
58 |
+
"""
|
59 |
+
if self._results:
|
60 |
+
return self._results
|
61 |
+
|
62 |
+
videos, continuation = self.fetch_and_parse()
|
63 |
+
self._results = videos
|
64 |
+
self._current_continuation = continuation
|
65 |
+
return self._results
|
66 |
+
|
67 |
+
def get_next_results(self):
|
68 |
+
"""Use the stored continuation string to fetch the next set of results.
|
69 |
+
|
70 |
+
This method does not return the results, but instead updates the results property.
|
71 |
+
"""
|
72 |
+
if self._current_continuation:
|
73 |
+
videos, continuation = self.fetch_and_parse(self._current_continuation)
|
74 |
+
self._results.extend(videos)
|
75 |
+
self._current_continuation = continuation
|
76 |
+
else:
|
77 |
+
raise IndexError
|
78 |
+
|
79 |
+
def fetch_and_parse(self, continuation=None):
|
80 |
+
"""Fetch from the innertube API and parse the results.
|
81 |
+
|
82 |
+
:param str continuation:
|
83 |
+
Continuation string for fetching results.
|
84 |
+
:rtype: tuple
|
85 |
+
:returns:
|
86 |
+
A tuple of a list of YouTube objects and a continuation string.
|
87 |
+
"""
|
88 |
+
# Begin by executing the query and identifying the relevant sections
|
89 |
+
# of the results
|
90 |
+
raw_results = self.fetch_query(continuation)
|
91 |
+
|
92 |
+
# Initial result is handled by try block, continuations by except block
|
93 |
+
try:
|
94 |
+
sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
|
95 |
+
'primaryContents']['sectionListRenderer']['contents']
|
96 |
+
except KeyError:
|
97 |
+
sections = raw_results['onResponseReceivedCommands'][0][
|
98 |
+
'appendContinuationItemsAction']['continuationItems']
|
99 |
+
item_renderer = None
|
100 |
+
continuation_renderer = None
|
101 |
+
for s in sections:
|
102 |
+
if 'itemSectionRenderer' in s:
|
103 |
+
item_renderer = s['itemSectionRenderer']
|
104 |
+
if 'continuationItemRenderer' in s:
|
105 |
+
continuation_renderer = s['continuationItemRenderer']
|
106 |
+
|
107 |
+
# If the continuationItemRenderer doesn't exist, assume no further results
|
108 |
+
if continuation_renderer:
|
109 |
+
next_continuation = continuation_renderer['continuationEndpoint'][
|
110 |
+
'continuationCommand']['token']
|
111 |
+
else:
|
112 |
+
next_continuation = None
|
113 |
+
|
114 |
+
# If the itemSectionRenderer doesn't exist, assume no results.
|
115 |
+
if item_renderer:
|
116 |
+
videos = []
|
117 |
+
raw_video_list = item_renderer['contents']
|
118 |
+
for video_details in raw_video_list:
|
119 |
+
# Skip over ads
|
120 |
+
if video_details.get('searchPyvRenderer', {}).get('ads', None):
|
121 |
+
continue
|
122 |
+
|
123 |
+
# Skip "recommended" type videos e.g. "people also watched" and "popular X"
|
124 |
+
# that break up the search results
|
125 |
+
if 'shelfRenderer' in video_details:
|
126 |
+
continue
|
127 |
+
|
128 |
+
# Skip auto-generated "mix" playlist results
|
129 |
+
if 'radioRenderer' in video_details:
|
130 |
+
continue
|
131 |
+
|
132 |
+
# Skip playlist results
|
133 |
+
if 'playlistRenderer' in video_details:
|
134 |
+
continue
|
135 |
+
|
136 |
+
# Skip channel results
|
137 |
+
if 'channelRenderer' in video_details:
|
138 |
+
continue
|
139 |
+
|
140 |
+
if 'videoRenderer' not in video_details:
|
141 |
+
logger.warn('Unexpected renderer encountered.')
|
142 |
+
logger.warn(f'Renderer name: {video_details.keys()}')
|
143 |
+
logger.warn(f'Search term: {self.query}')
|
144 |
+
logger.warn(
|
145 |
+
'Please open an issue at '
|
146 |
+
'https://github.com/pytube/pytube/issues '
|
147 |
+
'and provide this log output.'
|
148 |
+
)
|
149 |
+
continue
|
150 |
+
|
151 |
+
# Extract relevant video information from the details.
|
152 |
+
# Some of this can be used to pre-populate attributes of the
|
153 |
+
# YouTube object.
|
154 |
+
vid_renderer = video_details['videoRenderer']
|
155 |
+
vid_id = vid_renderer['videoId']
|
156 |
+
vid_url = f'https://www.youtube.com/watch?v={vid_id}'
|
157 |
+
vid_title = vid_renderer['title']['runs'][0]['text']
|
158 |
+
vid_channel_name = vid_renderer['ownerText']['runs'][0]['text']
|
159 |
+
vid_channel_uri = vid_renderer['ownerText']['runs'][0][
|
160 |
+
'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
|
161 |
+
# Livestreams have "runs", non-livestreams have "simpleText",
|
162 |
+
# and scheduled releases do not have 'viewCountText'
|
163 |
+
if 'viewCountText' in vid_renderer:
|
164 |
+
if 'runs' in vid_renderer['viewCountText']:
|
165 |
+
vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text']
|
166 |
+
else:
|
167 |
+
vid_view_count_text = vid_renderer['viewCountText']['simpleText']
|
168 |
+
# Strip ' views' text, then remove commas
|
169 |
+
vid_view_count = int(vid_view_count_text.split()[0].replace(',',''))
|
170 |
+
else:
|
171 |
+
vid_view_count = 0
|
172 |
+
if 'lengthText' in vid_renderer:
|
173 |
+
vid_length = vid_renderer['lengthText']['simpleText']
|
174 |
+
else:
|
175 |
+
vid_length = None
|
176 |
+
|
177 |
+
vid_metadata = {
|
178 |
+
'id': vid_id,
|
179 |
+
'url': vid_url,
|
180 |
+
'title': vid_title,
|
181 |
+
'channel_name': vid_channel_name,
|
182 |
+
'channel_url': vid_channel_uri,
|
183 |
+
'view_count': vid_view_count,
|
184 |
+
'length': vid_length
|
185 |
+
}
|
186 |
+
|
187 |
+
# Construct YouTube object from metadata and append to results
|
188 |
+
vid = YouTube(vid_metadata['url'])
|
189 |
+
vid.author = vid_metadata['channel_name']
|
190 |
+
vid.title = vid_metadata['title']
|
191 |
+
videos.append(vid)
|
192 |
+
else:
|
193 |
+
videos = None
|
194 |
+
|
195 |
+
return videos, next_continuation
|
196 |
+
|
197 |
+
def fetch_query(self, continuation=None):
|
198 |
+
"""Fetch raw results from the innertube API.
|
199 |
+
|
200 |
+
:param str continuation:
|
201 |
+
Continuation string for fetching results.
|
202 |
+
:rtype: dict
|
203 |
+
:returns:
|
204 |
+
The raw json object returned by the innertube API.
|
205 |
+
"""
|
206 |
+
query_results = self._innertube_client.search(self.query, continuation)
|
207 |
+
if not self._initial_results:
|
208 |
+
self._initial_results = query_results
|
209 |
+
return query_results # noqa:R504
|
pytube/innertube.py
CHANGED
@@ -103,7 +103,7 @@ class InnerTube:
|
|
103 |
query.update(self.base_params)
|
104 |
return self._call_api(endpoint, query, self.base_data)
|
105 |
|
106 |
-
def search(self, search_query):
|
107 |
"""Make a request to the search endpoint.
|
108 |
|
109 |
:param str search_query:
|
@@ -117,4 +117,8 @@ class InnerTube:
|
|
117 |
'query': search_query
|
118 |
}
|
119 |
query.update(self.base_params)
|
120 |
-
|
|
|
|
|
|
|
|
|
|
103 |
query.update(self.base_params)
|
104 |
return self._call_api(endpoint, query, self.base_data)
|
105 |
|
106 |
+
def search(self, search_query, continuation=None):
|
107 |
"""Make a request to the search endpoint.
|
108 |
|
109 |
:param str search_query:
|
|
|
117 |
'query': search_query
|
118 |
}
|
119 |
query.update(self.base_params)
|
120 |
+
data = {}
|
121 |
+
if continuation:
|
122 |
+
data['continuation'] = continuation
|
123 |
+
data.update(self.base_data)
|
124 |
+
return self._call_api(endpoint, query, data)
|