Taylor Fox Dahlin
commited on
Defer web requests until absolutely necessary (#953)
Browse files* Defers web requests until they're actually necessary to fetch information
* Adjusts tests to reflect changes to how web requests are made
* Removes "defer_prefetch_init" arg from YouTube object initialization
* Converted most attributes into caching properties to facilitate request deferring.
* Added some documentation to uniqueify to explain its purpose.
* Prefetching added to conftest to improve test speed
* Added some setters for YouTube properties.
- pytube/__main__.py +191 -131
- pytube/helpers.py +9 -5
- tests/conftest.py +12 -1
- tests/test_exceptions.py +3 -3
- tests/test_helpers.py +6 -4
- tests/test_main.py +5 -22
- tests/test_streams.py +5 -4
pytube/__main__.py
CHANGED
@@ -44,7 +44,6 @@ class YouTube:
|
|
44 |
def __init__(
|
45 |
self,
|
46 |
url: str,
|
47 |
-
defer_prefetch_init: bool = False,
|
48 |
on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
|
49 |
on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
|
50 |
proxies: Dict[str, str] = None,
|
@@ -63,27 +62,27 @@ class YouTube:
|
|
63 |
complete events.
|
64 |
|
65 |
"""
|
66 |
-
self.
|
67 |
-
self.
|
68 |
|
69 |
# note: vid_info may eventually be removed. It sounds like it once had
|
70 |
# additional formats, but that doesn't appear to still be the case.
|
71 |
|
72 |
# the url to vid info, parsed from watch html
|
73 |
-
self.
|
74 |
-
self.
|
75 |
-
self.
|
76 |
-
|
77 |
-
self.
|
78 |
-
self.
|
79 |
-
self.
|
80 |
-
self.
|
81 |
# streams
|
82 |
-
self.
|
83 |
|
84 |
-
self.
|
85 |
|
86 |
-
self.
|
87 |
self._metadata: Optional[YouTubeMetadata] = None
|
88 |
|
89 |
# video_id part of /watch?v=<video_id>
|
@@ -100,60 +99,108 @@ class YouTube:
|
|
100 |
if proxies:
|
101 |
install_proxy(proxies)
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
115 |
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
raise MembersOnly(video_id=self.video_id)
|
125 |
-
elif reason == 'This live stream recording is not available.':
|
126 |
-
raise RecordingUnavailable(video_id=self.video_id)
|
127 |
-
else:
|
128 |
-
if reason == 'Video unavailable':
|
129 |
-
if extract.is_region_blocked(self.watch_html):
|
130 |
-
raise VideoRegionBlocked(video_id=self.video_id)
|
131 |
-
raise VideoUnavailable(video_id=self.video_id)
|
132 |
-
elif status == 'LOGIN_REQUIRED':
|
133 |
-
if reason == (
|
134 |
-
'This is a private video. '
|
135 |
-
'Please sign in to verify that you may see it.'
|
136 |
-
):
|
137 |
-
raise VideoPrivate(video_id=self.video_id)
|
138 |
-
elif status == 'ERROR':
|
139 |
-
if reason == 'Video unavailable':
|
140 |
-
raise VideoUnavailable(video_id=self.video_id)
|
141 |
|
142 |
-
|
143 |
-
|
|
|
|
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
|
|
|
|
|
|
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# On pre-signed videos, we need to use get_ytplayer_config to fix
|
158 |
# the player_response item
|
159 |
if 'streamingData' not in self.player_config_args['player_response']:
|
@@ -163,6 +210,20 @@ class YouTube:
|
|
163 |
else:
|
164 |
self.player_config_args['player_response'] = config_response
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
# https://github.com/nficano/pytube/issues/165
|
167 |
stream_maps = ["url_encoded_fmt_stream_map"]
|
168 |
if "adaptive_fmts" in self.player_config_args:
|
@@ -177,80 +238,60 @@ class YouTube:
|
|
177 |
apply_signature(self.player_config_args, fmt, self.js)
|
178 |
|
179 |
# build instances of :class:`Stream <Stream>`
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
-
# load the player_response object (contains subtitle information)
|
183 |
-
if isinstance(self.player_config_args["player_response"], str):
|
184 |
-
self.player_response = json.loads(
|
185 |
-
self.player_config_args["player_response"]
|
186 |
-
)
|
187 |
-
else:
|
188 |
-
self.player_response = self.player_config_args["player_response"]
|
189 |
-
del self.player_config_args["player_response"]
|
190 |
self.stream_monostate.title = self.title
|
191 |
self.stream_monostate.duration = self.length
|
192 |
|
193 |
-
|
194 |
-
"""Eagerly download all necessary data.
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
which blocks for long periods of time.
|
199 |
|
200 |
-
|
|
|
201 |
"""
|
202 |
-
|
203 |
-
self.check_availability()
|
204 |
-
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
205 |
-
|
206 |
-
if self.age_restricted:
|
207 |
-
if not self.embed_html:
|
208 |
-
self.embed_html = request.get(url=self.embed_url)
|
209 |
-
self.vid_info_url = extract.video_info_url_age_restricted(
|
210 |
-
self.video_id, self.watch_url
|
211 |
-
)
|
212 |
-
self.js_url = extract.js_url(self.embed_html)
|
213 |
-
else:
|
214 |
-
self.vid_info_url = extract.video_info_url(
|
215 |
-
video_id=self.video_id, watch_url=self.watch_url
|
216 |
-
)
|
217 |
-
self.js_url = extract.js_url(self.watch_html)
|
218 |
-
|
219 |
-
self.initial_data = extract.initial_data(self.watch_html)
|
220 |
-
|
221 |
-
self.vid_info_raw = request.get(self.vid_info_url)
|
222 |
-
|
223 |
-
# If the js_url doesn't match the cached url, fetch the new js and update
|
224 |
-
# the cache; otherwise, load the cache.
|
225 |
-
if pytube.__js_url__ != self.js_url:
|
226 |
-
self.js = request.get(self.js_url)
|
227 |
-
pytube.__js__ = self.js
|
228 |
-
pytube.__js_url__ = self.js_url
|
229 |
-
else:
|
230 |
-
self.js = pytube.__js__
|
231 |
-
|
232 |
-
def initialize_stream_objects(self, fmt: str) -> None:
|
233 |
-
"""Convert manifest data to instances of :class:`Stream <Stream>`.
|
234 |
-
|
235 |
-
Take the unscrambled stream data and uses it to initialize
|
236 |
-
instances of :class:`Stream <Stream>` for each media stream.
|
237 |
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
|
243 |
-
|
|
|
|
|
244 |
|
|
|
245 |
"""
|
246 |
-
|
247 |
-
for stream in stream_manifest:
|
248 |
-
video = Stream(
|
249 |
-
stream=stream,
|
250 |
-
player_config_args=self.player_config_args,
|
251 |
-
monostate=self.stream_monostate,
|
252 |
-
)
|
253 |
-
self.fmt_streams.append(video)
|
254 |
|
255 |
@property
|
256 |
def caption_tracks(self) -> List[Caption]:
|
@@ -279,6 +320,7 @@ class YouTube:
|
|
279 |
|
280 |
:rtype: :class:`StreamQuery <StreamQuery>`.
|
281 |
"""
|
|
|
282 |
return StreamQuery(self.fmt_streams)
|
283 |
|
284 |
@property
|
@@ -286,7 +328,6 @@ class YouTube:
|
|
286 |
"""Get the thumbnail url image.
|
287 |
|
288 |
:rtype: str
|
289 |
-
|
290 |
"""
|
291 |
thumbnail_details = (
|
292 |
self.player_response.get("videoDetails", {})
|
@@ -304,25 +345,38 @@ class YouTube:
|
|
304 |
"""Get the publish date.
|
305 |
|
306 |
:rtype: datetime
|
307 |
-
|
308 |
"""
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
@property
|
312 |
def title(self) -> str:
|
313 |
"""Get the video title.
|
314 |
|
315 |
:rtype: str
|
316 |
-
|
317 |
"""
|
318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
|
320 |
@property
|
321 |
def description(self) -> str:
|
322 |
"""Get the video description.
|
323 |
|
324 |
:rtype: str
|
325 |
-
|
326 |
"""
|
327 |
return self.player_response.get("videoDetails", {}).get("shortDescription")
|
328 |
|
@@ -340,7 +394,6 @@ class YouTube:
|
|
340 |
"""Get the video length in seconds.
|
341 |
|
342 |
:rtype: int
|
343 |
-
|
344 |
"""
|
345 |
return int(
|
346 |
self.player_config_args.get("length_seconds")
|
@@ -356,7 +409,6 @@ class YouTube:
|
|
356 |
"""Get the number of the times the video has been viewed.
|
357 |
|
358 |
:rtype: int
|
359 |
-
|
360 |
"""
|
361 |
return int(
|
362 |
self.player_response.get("videoDetails", {}).get("viewCount")
|
@@ -367,9 +419,17 @@ class YouTube:
|
|
367 |
"""Get the video author.
|
368 |
:rtype: str
|
369 |
"""
|
370 |
-
|
|
|
|
|
371 |
"author", "unknown"
|
372 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
@property
|
375 |
def keywords(self) -> List[str]:
|
|
|
44 |
def __init__(
|
45 |
self,
|
46 |
url: str,
|
|
|
47 |
on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
|
48 |
on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
|
49 |
proxies: Dict[str, str] = None,
|
|
|
62 |
complete events.
|
63 |
|
64 |
"""
|
65 |
+
self._js: Optional[str] = None # js fetched by js_url
|
66 |
+
self._js_url: Optional[str] = None # the url to the js, parsed from watch html
|
67 |
|
68 |
# note: vid_info may eventually be removed. It sounds like it once had
|
69 |
# additional formats, but that doesn't appear to still be the case.
|
70 |
|
71 |
# the url to vid info, parsed from watch html
|
72 |
+
self._vid_info_url: Optional[str] = None
|
73 |
+
self._vid_info_raw: Optional[str] = None # content fetched by vid_info_url
|
74 |
+
self._vid_info: Optional[Dict] = None # parsed content of vid_info_raw
|
75 |
+
|
76 |
+
self._watch_html: Optional[str] = None # the html of /watch?v=<video_id>
|
77 |
+
self._embed_html: Optional[str] = None
|
78 |
+
self._player_config_args: Optional[Dict] = None # inline js in the html containing
|
79 |
+
self._player_response: Optional[Dict] = None
|
80 |
# streams
|
81 |
+
self._age_restricted: Optional[bool] = None
|
82 |
|
83 |
+
self._fmt_streams: Optional[List[Stream]] = None
|
84 |
|
85 |
+
self._initial_data = None
|
86 |
self._metadata: Optional[YouTubeMetadata] = None
|
87 |
|
88 |
# video_id part of /watch?v=<video_id>
|
|
|
99 |
if proxies:
|
100 |
install_proxy(proxies)
|
101 |
|
102 |
+
self._author = None
|
103 |
+
self._title = None
|
104 |
+
self._publish_date = None
|
105 |
|
106 |
+
@property
|
107 |
+
def watch_html(self):
|
108 |
+
if self._watch_html:
|
109 |
+
return self._watch_html
|
110 |
+
self._watch_html = request.get(url=self.watch_url)
|
111 |
+
return self._watch_html
|
112 |
|
113 |
+
@property
|
114 |
+
def embed_html(self):
|
115 |
+
if self._embed_html:
|
116 |
+
return self._embed_html
|
117 |
+
self._embed_html = request.get(url=self.embed_url)
|
118 |
+
return self._embed_html
|
119 |
|
120 |
+
@property
|
121 |
+
def vid_info_raw(self):
|
122 |
+
if self._vid_info_raw:
|
123 |
+
return self._vid_info_raw
|
124 |
+
self._vid_info_raw = request.get(self.vid_info_url)
|
125 |
+
return self._vid_info_raw
|
126 |
|
127 |
+
@property
|
128 |
+
def age_restricted(self):
|
129 |
+
if self._age_restricted:
|
130 |
+
return self._age_restricted
|
131 |
+
self._age_restricted = extract.is_age_restricted(self.watch_html)
|
132 |
+
return self._age_restricted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
+
@property
|
135 |
+
def vid_info_url(self):
|
136 |
+
if self._vid_info_url:
|
137 |
+
return self._vid_info_url
|
138 |
|
139 |
+
if self.age_restricted:
|
140 |
+
self._vid_info_url = extract.video_info_url_age_restricted(
|
141 |
+
self.video_id, self.watch_url
|
142 |
+
)
|
143 |
+
else:
|
144 |
+
self._vid_info_url = extract.video_info_url(
|
145 |
+
video_id=self.video_id, watch_url=self.watch_url
|
146 |
+
)
|
147 |
+
return self._vid_info_url
|
148 |
|
149 |
+
@property
|
150 |
+
def js_url(self):
|
151 |
+
if self._js_url:
|
152 |
+
return self._js_url
|
153 |
|
154 |
+
if self.age_restricted:
|
155 |
+
self._js_url = extract.js_url(self.embed_html)
|
156 |
+
else:
|
157 |
+
self._js_url = extract.js_url(self.watch_html)
|
158 |
+
|
159 |
+
return self._js_url
|
160 |
+
|
161 |
+
@property
|
162 |
+
def js(self):
|
163 |
+
if self._js:
|
164 |
+
return self._js
|
165 |
+
|
166 |
+
# If the js_url doesn't match the cached url, fetch the new js and update
|
167 |
+
# the cache; otherwise, load the cache.
|
168 |
+
if pytube.__js_url__ != self.js_url:
|
169 |
+
self._js = request.get(self.js_url)
|
170 |
+
pytube.__js__ = self._js
|
171 |
+
pytube.__js_url__ = self.js_url
|
172 |
+
else:
|
173 |
+
self._js = pytube.__js__
|
174 |
+
|
175 |
+
return self._js
|
176 |
+
|
177 |
+
@property
|
178 |
+
def player_response(self):
|
179 |
+
"""The player response contains subtitle information and video details."""
|
180 |
+
if self._player_response:
|
181 |
+
return self._player_response
|
182 |
|
183 |
+
if isinstance(self.player_config_args["player_response"], str):
|
184 |
+
self._player_response = json.loads(
|
185 |
+
self.player_config_args["player_response"]
|
186 |
+
)
|
187 |
+
else:
|
188 |
+
self._player_response = self.player_config_args["player_response"]
|
189 |
+
return self._player_response
|
190 |
+
|
191 |
+
@property
|
192 |
+
def initial_data(self):
|
193 |
+
if self._initial_data:
|
194 |
+
return self._initial_data
|
195 |
+
self._initial_data = extract.initial_data(self.watch_html)
|
196 |
+
return self._initial_data
|
197 |
+
|
198 |
+
@property
|
199 |
+
def player_config_args(self):
|
200 |
+
if self._player_config_args:
|
201 |
+
return self._player_config_args
|
202 |
+
|
203 |
+
self._player_config_args = self.vid_info
|
204 |
# On pre-signed videos, we need to use get_ytplayer_config to fix
|
205 |
# the player_response item
|
206 |
if 'streamingData' not in self.player_config_args['player_response']:
|
|
|
210 |
else:
|
211 |
self.player_config_args['player_response'] = config_response
|
212 |
|
213 |
+
return self._player_config_args
|
214 |
+
|
215 |
+
@property
|
216 |
+
def fmt_streams(self):
|
217 |
+
"""Returns a list of streams if they have been initialized.
|
218 |
+
|
219 |
+
If the streams have not been initialized, finds all relevant
|
220 |
+
streams and initializes them.
|
221 |
+
"""
|
222 |
+
self.check_availability()
|
223 |
+
if self._fmt_streams:
|
224 |
+
return self._fmt_streams
|
225 |
+
|
226 |
+
self._fmt_streams = []
|
227 |
# https://github.com/nficano/pytube/issues/165
|
228 |
stream_maps = ["url_encoded_fmt_stream_map"]
|
229 |
if "adaptive_fmts" in self.player_config_args:
|
|
|
238 |
apply_signature(self.player_config_args, fmt, self.js)
|
239 |
|
240 |
# build instances of :class:`Stream <Stream>`
|
241 |
+
# Initialize stream objects
|
242 |
+
stream_manifest = self.player_config_args[fmt]
|
243 |
+
for stream in stream_manifest:
|
244 |
+
video = Stream(
|
245 |
+
stream=stream,
|
246 |
+
player_config_args=self.player_config_args,
|
247 |
+
monostate=self.stream_monostate,
|
248 |
+
)
|
249 |
+
self._fmt_streams.append(video)
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
self.stream_monostate.title = self.title
|
252 |
self.stream_monostate.duration = self.length
|
253 |
|
254 |
+
return self._fmt_streams
|
|
|
255 |
|
256 |
+
def check_availability(self):
|
257 |
+
"""Check whether the video is available.
|
|
|
258 |
|
259 |
+
Raises different exceptions based on why the video is unavailable,
|
260 |
+
otherwise does nothing.
|
261 |
"""
|
262 |
+
status, messages = extract.playability_status(self.watch_html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
+
for reason in messages:
|
265 |
+
if status == 'UNPLAYABLE':
|
266 |
+
if reason == (
|
267 |
+
'Join this channel to get access to members-only content '
|
268 |
+
'like this video, and other exclusive perks.'
|
269 |
+
):
|
270 |
+
raise MembersOnly(video_id=self.video_id)
|
271 |
+
elif reason == 'This live stream recording is not available.':
|
272 |
+
raise RecordingUnavailable(video_id=self.video_id)
|
273 |
+
else:
|
274 |
+
if reason == 'Video unavailable':
|
275 |
+
if extract.is_region_blocked(self.watch_html):
|
276 |
+
raise VideoRegionBlocked(video_id=self.video_id)
|
277 |
+
raise VideoUnavailable(video_id=self.video_id)
|
278 |
+
elif status == 'LOGIN_REQUIRED':
|
279 |
+
if reason == (
|
280 |
+
'This is a private video. '
|
281 |
+
'Please sign in to verify that you may see it.'
|
282 |
+
):
|
283 |
+
raise VideoPrivate(video_id=self.video_id)
|
284 |
+
elif status == 'ERROR':
|
285 |
+
if reason == 'Video unavailable':
|
286 |
+
raise VideoUnavailable(video_id=self.video_id)
|
287 |
|
288 |
+
@property
|
289 |
+
def vid_info(self):
|
290 |
+
"""Parse the raw vid info and return the parsed result.
|
291 |
|
292 |
+
:rtype: Dict[Any, Any]
|
293 |
"""
|
294 |
+
return dict(parse_qsl(self.vid_info_raw))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
@property
|
297 |
def caption_tracks(self) -> List[Caption]:
|
|
|
320 |
|
321 |
:rtype: :class:`StreamQuery <StreamQuery>`.
|
322 |
"""
|
323 |
+
self.check_availability()
|
324 |
return StreamQuery(self.fmt_streams)
|
325 |
|
326 |
@property
|
|
|
328 |
"""Get the thumbnail url image.
|
329 |
|
330 |
:rtype: str
|
|
|
331 |
"""
|
332 |
thumbnail_details = (
|
333 |
self.player_response.get("videoDetails", {})
|
|
|
345 |
"""Get the publish date.
|
346 |
|
347 |
:rtype: datetime
|
|
|
348 |
"""
|
349 |
+
if self._publish_date:
|
350 |
+
return self._publish_date
|
351 |
+
self._publish_date = extract.publish_date(self.watch_html)
|
352 |
+
return self._publish_date
|
353 |
+
|
354 |
+
@publish_date.setter
|
355 |
+
def publish_date(self, value):
|
356 |
+
"""Sets the publish date."""
|
357 |
+
self._publish_date = value
|
358 |
|
359 |
@property
|
360 |
def title(self) -> str:
|
361 |
"""Get the video title.
|
362 |
|
363 |
:rtype: str
|
|
|
364 |
"""
|
365 |
+
if self._title:
|
366 |
+
return self._title
|
367 |
+
self._title = self.player_response['videoDetails']['title']
|
368 |
+
return self._title
|
369 |
+
|
370 |
+
@title.setter
|
371 |
+
def title(self, value):
|
372 |
+
"""Sets the title value."""
|
373 |
+
self._title = value
|
374 |
|
375 |
@property
|
376 |
def description(self) -> str:
|
377 |
"""Get the video description.
|
378 |
|
379 |
:rtype: str
|
|
|
380 |
"""
|
381 |
return self.player_response.get("videoDetails", {}).get("shortDescription")
|
382 |
|
|
|
394 |
"""Get the video length in seconds.
|
395 |
|
396 |
:rtype: int
|
|
|
397 |
"""
|
398 |
return int(
|
399 |
self.player_config_args.get("length_seconds")
|
|
|
409 |
"""Get the number of the times the video has been viewed.
|
410 |
|
411 |
:rtype: int
|
|
|
412 |
"""
|
413 |
return int(
|
414 |
self.player_response.get("videoDetails", {}).get("viewCount")
|
|
|
419 |
"""Get the video author.
|
420 |
:rtype: str
|
421 |
"""
|
422 |
+
if self._author:
|
423 |
+
return self._author
|
424 |
+
self._author = self.player_response.get("videoDetails", {}).get(
|
425 |
"author", "unknown"
|
426 |
)
|
427 |
+
return self._author
|
428 |
+
|
429 |
+
@author.setter
|
430 |
+
def author(self, value):
|
431 |
+
"""Set the video author."""
|
432 |
+
self._author = value
|
433 |
|
434 |
@property
|
435 |
def keywords(self) -> List[str]:
|
pytube/helpers.py
CHANGED
@@ -173,6 +173,14 @@ def install_proxy(proxy_handler: Dict[str, str]) -> None:
|
|
173 |
|
174 |
|
175 |
def uniqueify(duped_list: List) -> List:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
seen: Dict[Any, bool] = {}
|
177 |
result = []
|
178 |
for item in duped_list:
|
@@ -205,11 +213,7 @@ def create_mock_html_json(vid_id) -> Dict[str, Any]:
|
|
205 |
pytube_mocks_path = os.path.join(pytube_dir_path, 'tests', 'mocks')
|
206 |
gzip_filepath = os.path.join(pytube_mocks_path, gzip_filename)
|
207 |
|
208 |
-
yt = YouTube(
|
209 |
-
'https://www.youtube.com/watch?v=%s' % vid_id,
|
210 |
-
defer_prefetch_init=True
|
211 |
-
)
|
212 |
-
yt.prefetch()
|
213 |
html_data = {
|
214 |
'url': yt.watch_url,
|
215 |
'js': yt.js,
|
|
|
173 |
|
174 |
|
175 |
def uniqueify(duped_list: List) -> List:
|
176 |
+
"""Remove duplicate items from a list, while maintaining list order.
|
177 |
+
|
178 |
+
:param List duped_list
|
179 |
+
List to remove duplicates from
|
180 |
+
|
181 |
+
:return List result
|
182 |
+
De-duplicated list
|
183 |
+
"""
|
184 |
seen: Dict[Any, bool] = {}
|
185 |
result = []
|
186 |
for item in duped_list:
|
|
|
213 |
pytube_mocks_path = os.path.join(pytube_dir_path, 'tests', 'mocks')
|
214 |
gzip_filepath = os.path.join(pytube_mocks_path, gzip_filename)
|
215 |
|
216 |
+
yt = YouTube(f'https://www.youtube.com/watch?v={vid_id}')
|
|
|
|
|
|
|
|
|
217 |
html_data = {
|
218 |
'url': yt.watch_url,
|
219 |
'js': yt.js,
|
tests/conftest.py
CHANGED
@@ -34,7 +34,18 @@ def load_and_init_from_playback_file(filename, mock_urlopen):
|
|
34 |
]
|
35 |
mock_urlopen.return_value = mock_url_open_object
|
36 |
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
@pytest.fixture
|
|
|
34 |
]
|
35 |
mock_urlopen.return_value = mock_url_open_object
|
36 |
|
37 |
+
# Pytest caches this result, so we can speed up the tests
|
38 |
+
# by causing the object to fetch all the relevant information
|
39 |
+
# it needs. Previously, this was handled by prefetch_init()
|
40 |
+
# and descramble(), but this functionality has since been
|
41 |
+
# deferred
|
42 |
+
v = YouTube(pb["url"])
|
43 |
+
v.watch_html
|
44 |
+
v.vid_info_raw
|
45 |
+
v.js
|
46 |
+
v.fmt_streams
|
47 |
+
v.player_response
|
48 |
+
return v
|
49 |
|
50 |
|
51 |
@pytest.fixture
|
tests/test_exceptions.py
CHANGED
@@ -79,7 +79,7 @@ def test_raises_video_private(private):
|
|
79 |
]
|
80 |
mock_url_open.return_value = mock_url_open_object
|
81 |
with pytest.raises(VideoPrivate):
|
82 |
-
YouTube('https://youtube.com/watch?v=m8uHb5jIGN8')
|
83 |
|
84 |
|
85 |
def test_raises_recording_unavailable(missing_recording):
|
@@ -91,7 +91,7 @@ def test_raises_recording_unavailable(missing_recording):
|
|
91 |
]
|
92 |
mock_url_open.return_value = mock_url_open_object
|
93 |
with pytest.raises(RecordingUnavailable):
|
94 |
-
YouTube('https://youtube.com/watch?v=5YceQ8YqYMc')
|
95 |
|
96 |
|
97 |
def test_raises_video_region_blocked(region_blocked):
|
@@ -103,4 +103,4 @@ def test_raises_video_region_blocked(region_blocked):
|
|
103 |
]
|
104 |
mock_url_open.return_value = mock_url_open_object
|
105 |
with pytest.raises(VideoRegionBlocked):
|
106 |
-
YouTube('https://youtube.com/watch?v=hZpzr8TbF08')
|
|
|
79 |
]
|
80 |
mock_url_open.return_value = mock_url_open_object
|
81 |
with pytest.raises(VideoPrivate):
|
82 |
+
YouTube('https://youtube.com/watch?v=m8uHb5jIGN8').streams
|
83 |
|
84 |
|
85 |
def test_raises_recording_unavailable(missing_recording):
|
|
|
91 |
]
|
92 |
mock_url_open.return_value = mock_url_open_object
|
93 |
with pytest.raises(RecordingUnavailable):
|
94 |
+
YouTube('https://youtube.com/watch?v=5YceQ8YqYMc').streams
|
95 |
|
96 |
|
97 |
def test_raises_video_region_blocked(region_blocked):
|
|
|
103 |
]
|
104 |
mock_url_open.return_value = mock_url_open_object
|
105 |
with pytest.raises(VideoRegionBlocked):
|
106 |
+
YouTube('https://youtube.com/watch?v=hZpzr8TbF08').streams
|
tests/test_helpers.py
CHANGED
@@ -117,14 +117,16 @@ def test_create_mock_html_json(mock_url_open, mock_open):
|
|
117 |
mock_url_open_object = mock.Mock()
|
118 |
|
119 |
# Order is:
|
120 |
-
# 1. watch_html -- must have
|
121 |
-
# 2.
|
122 |
-
# 3.
|
|
|
123 |
mock_url_open_object.read.side_effect = [
|
124 |
(b'yt.setConfig({"PLAYER_CONFIG":{"args":[]}});ytInitialData = {};ytInitialPlayerResponse = {};' # noqa: E501
|
125 |
b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
|
|
|
|
|
126 |
b'vid_info_raw',
|
127 |
-
b'js_result',
|
128 |
]
|
129 |
mock_url_open.return_value = mock_url_open_object
|
130 |
|
|
|
117 |
mock_url_open_object = mock.Mock()
|
118 |
|
119 |
# Order is:
|
120 |
+
# 1. watch_html -- must have jsurl match
|
121 |
+
# 2. embed html
|
122 |
+
# 3. watch html
|
123 |
+
# 4. raw vid info
|
124 |
mock_url_open_object.read.side_effect = [
|
125 |
(b'yt.setConfig({"PLAYER_CONFIG":{"args":[]}});ytInitialData = {};ytInitialPlayerResponse = {};' # noqa: E501
|
126 |
b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
|
127 |
+
b'embed_html',
|
128 |
+
b'watch_html',
|
129 |
b'vid_info_raw',
|
|
|
130 |
]
|
131 |
mock_url_open.return_value = mock_url_open_object
|
132 |
|
tests/test_main.py
CHANGED
@@ -5,7 +5,7 @@ import pytest
|
|
5 |
|
6 |
import pytube
|
7 |
from pytube import YouTube
|
8 |
-
from pytube.exceptions import
|
9 |
|
10 |
|
11 |
@mock.patch("pytube.__main__.YouTube")
|
@@ -21,7 +21,6 @@ def test_install_proxy(opener):
|
|
21 |
proxies = {"http": "http://www.example.com:3128/"}
|
22 |
YouTube(
|
23 |
"https://www.youtube.com/watch?v=9bZkp7q19f0",
|
24 |
-
defer_prefetch_init=True,
|
25 |
proxies=proxies,
|
26 |
)
|
27 |
opener.assert_called()
|
@@ -29,12 +28,10 @@ def test_install_proxy(opener):
|
|
29 |
|
30 |
@mock.patch("pytube.request.get")
|
31 |
def test_video_unavailable(get):
|
32 |
-
get.return_value =
|
33 |
-
youtube = YouTube(
|
34 |
-
|
35 |
-
|
36 |
-
with pytest.raises(VideoUnavailable):
|
37 |
-
youtube.prefetch()
|
38 |
|
39 |
|
40 |
def test_video_keywords(cipher_signature):
|
@@ -62,17 +59,3 @@ def test_js_caching(cipher_signature):
|
|
62 |
assert pytube.__js_url__ is not None
|
63 |
assert pytube.__js__ == cipher_signature.js
|
64 |
assert pytube.__js_url__ == cipher_signature.js_url
|
65 |
-
|
66 |
-
with mock.patch('pytube.request.urlopen') as mock_urlopen:
|
67 |
-
mock_urlopen_object = mock.Mock()
|
68 |
-
|
69 |
-
# We should never read the js from this
|
70 |
-
mock_urlopen_object.read.side_effect = [
|
71 |
-
cipher_signature.watch_html.encode('utf-8'),
|
72 |
-
cipher_signature.vid_info_raw.encode('utf-8'),
|
73 |
-
cipher_signature.js.encode('utf-8')
|
74 |
-
]
|
75 |
-
|
76 |
-
mock_urlopen.return_value = mock_urlopen_object
|
77 |
-
cipher_signature.prefetch()
|
78 |
-
assert mock_urlopen.call_count == 2
|
|
|
5 |
|
6 |
import pytube
|
7 |
from pytube import YouTube
|
8 |
+
from pytube.exceptions import RegexMatchError
|
9 |
|
10 |
|
11 |
@mock.patch("pytube.__main__.YouTube")
|
|
|
21 |
proxies = {"http": "http://www.example.com:3128/"}
|
22 |
YouTube(
|
23 |
"https://www.youtube.com/watch?v=9bZkp7q19f0",
|
|
|
24 |
proxies=proxies,
|
25 |
)
|
26 |
opener.assert_called()
|
|
|
28 |
|
29 |
@mock.patch("pytube.request.get")
|
30 |
def test_video_unavailable(get):
|
31 |
+
get.return_value = ""
|
32 |
+
youtube = YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0")
|
33 |
+
with pytest.raises(RegexMatchError):
|
34 |
+
youtube.check_availability()
|
|
|
|
|
35 |
|
36 |
|
37 |
def test_video_keywords(cipher_signature):
|
|
|
59 |
assert pytube.__js_url__ is not None
|
60 |
assert pytube.__js__ == cipher_signature.js
|
61 |
assert pytube.__js_url__ == cipher_signature.js_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_streams.py
CHANGED
@@ -251,17 +251,18 @@ def test_on_complete_hook(cipher_signature):
|
|
251 |
|
252 |
def test_author(cipher_signature):
|
253 |
expected = "Test author"
|
254 |
-
cipher_signature.
|
255 |
assert cipher_signature.author == expected
|
256 |
|
257 |
expected = "unknown"
|
258 |
-
cipher_signature.
|
|
|
259 |
assert cipher_signature.author == expected
|
260 |
|
261 |
|
262 |
def test_thumbnail_when_in_details(cipher_signature):
|
263 |
expected = "some url"
|
264 |
-
cipher_signature.
|
265 |
"videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
|
266 |
}
|
267 |
assert cipher_signature.thumbnail_url == expected
|
@@ -269,7 +270,7 @@ def test_thumbnail_when_in_details(cipher_signature):
|
|
269 |
|
270 |
def test_thumbnail_when_not_in_details(cipher_signature):
|
271 |
expected = "https://img.youtube.com/vi/2lAe1cqCOXo/maxresdefault.jpg"
|
272 |
-
cipher_signature.
|
273 |
assert cipher_signature.thumbnail_url == expected
|
274 |
|
275 |
|
|
|
251 |
|
252 |
def test_author(cipher_signature):
|
253 |
expected = "Test author"
|
254 |
+
cipher_signature._player_response = {"videoDetails": {"author": expected}}
|
255 |
assert cipher_signature.author == expected
|
256 |
|
257 |
expected = "unknown"
|
258 |
+
cipher_signature.author = None
|
259 |
+
cipher_signature._player_response = {'key': 'value'}
|
260 |
assert cipher_signature.author == expected
|
261 |
|
262 |
|
263 |
def test_thumbnail_when_in_details(cipher_signature):
|
264 |
expected = "some url"
|
265 |
+
cipher_signature._player_response = {
|
266 |
"videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
|
267 |
}
|
268 |
assert cipher_signature.thumbnail_url == expected
|
|
|
270 |
|
271 |
def test_thumbnail_when_not_in_details(cipher_signature):
|
272 |
expected = "https://img.youtube.com/vi/2lAe1cqCOXo/maxresdefault.jpg"
|
273 |
+
cipher_signature._player_response = {'key': 'value'}
|
274 |
assert cipher_signature.thumbnail_url == expected
|
275 |
|
276 |
|