Taylor Fox Dahlin
commited on
Improve metadata availability (#988)
Browse files* Added channel id and channel url properties to YouTube object.
* Added some metadata to playlist object:
- owner
- owner_id
- owner_url
- description
- length
- views
- pytube/__main__.py +14 -0
- pytube/contrib/playlist.py +98 -13
- tests/contrib/test_playlist.py +67 -11
- tests/test_main.py +8 -8
pytube/__main__.py
CHANGED
@@ -422,6 +422,20 @@ class YouTube:
|
|
422 |
"""
|
423 |
return self.player_response.get('videoDetails', {}).get('keywords', [])
|
424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
@property
|
426 |
def metadata(self) -> Optional[YouTubeMetadata]:
|
427 |
"""Get the metadata for the video.
|
|
|
422 |
"""
|
423 |
return self.player_response.get('videoDetails', {}).get('keywords', [])
|
424 |
|
425 |
+
@property
|
426 |
+
def channel_id(self) -> str:
|
427 |
+
"""Get the video poster's channel id.
|
428 |
+
:rtype: str
|
429 |
+
"""
|
430 |
+
return self.player_response.get('videoDetails', {}).get('channelId', None)
|
431 |
+
|
432 |
+
@property
|
433 |
+
def channel_url(self) -> str:
|
434 |
+
"""Construct the channel url for the video's poster from the channel id.
|
435 |
+
:rtype: str
|
436 |
+
"""
|
437 |
+
return f'https://www.youtube.com/channel/{self.channel_id}'
|
438 |
+
|
439 |
@property
|
440 |
def metadata(self) -> Optional[YouTubeMetadata]:
|
441 |
"""Get the metadata for the video.
|
pytube/contrib/playlist.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
"""Module to download a complete playlist from a youtube channel."""
|
2 |
import json
|
3 |
import logging
|
4 |
-
import re
|
5 |
from collections.abc import Sequence
|
6 |
from datetime import date, datetime
|
7 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
8 |
|
9 |
from pytube import extract, request, YouTube
|
10 |
-
from pytube.helpers import cache, DeferredGeneratorList, install_proxy,
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
@@ -24,6 +23,8 @@ class Playlist(Sequence):
|
|
24 |
# These need to be initialized as None for the properties.
|
25 |
self._html = None
|
26 |
self._ytcfg = None
|
|
|
|
|
27 |
|
28 |
self._playlist_id = None
|
29 |
|
@@ -52,6 +53,23 @@ class Playlist(Sequence):
|
|
52 |
self._ytcfg = extract.get_ytcfg(self.html)
|
53 |
return self._ytcfg
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
@property
|
56 |
def yt_api_key(self):
|
57 |
return self.ytcfg['INNERTUBE_API_KEY']
|
@@ -271,15 +289,20 @@ class Playlist(Sequence):
|
|
271 |
@property
|
272 |
@cache
|
273 |
def last_updated(self) -> Optional[date]:
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
@property
|
285 |
@cache
|
@@ -289,8 +312,70 @@ class Playlist(Sequence):
|
|
289 |
:return: playlist title (name)
|
290 |
:rtype: Optional[str]
|
291 |
"""
|
292 |
-
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
@staticmethod
|
296 |
def _video_url(watch_path: str):
|
|
|
1 |
"""Module to download a complete playlist from a youtube channel."""
|
2 |
import json
|
3 |
import logging
|
|
|
4 |
from collections.abc import Sequence
|
5 |
from datetime import date, datetime
|
6 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
7 |
|
8 |
from pytube import extract, request, YouTube
|
9 |
+
from pytube.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
|
|
23 |
# These need to be initialized as None for the properties.
|
24 |
self._html = None
|
25 |
self._ytcfg = None
|
26 |
+
self._initial_data = None
|
27 |
+
self._sidebar_info = None
|
28 |
|
29 |
self._playlist_id = None
|
30 |
|
|
|
53 |
self._ytcfg = extract.get_ytcfg(self.html)
|
54 |
return self._ytcfg
|
55 |
|
56 |
+
@property
|
57 |
+
def initial_data(self):
|
58 |
+
if self._initial_data:
|
59 |
+
return self._initial_data
|
60 |
+
else:
|
61 |
+
self._initial_data = extract.initial_data(self.html)
|
62 |
+
return self._initial_data
|
63 |
+
|
64 |
+
@property
|
65 |
+
def sidebar_info(self):
|
66 |
+
if self._sidebar_info:
|
67 |
+
return self._sidebar_info
|
68 |
+
else:
|
69 |
+
self._sidebar_info = self.initial_data['sidebar'][
|
70 |
+
'playlistSidebarRenderer']['items']
|
71 |
+
return self._sidebar_info
|
72 |
+
|
73 |
@property
|
74 |
def yt_api_key(self):
|
75 |
return self.ytcfg['INNERTUBE_API_KEY']
|
|
|
289 |
@property
|
290 |
@cache
|
291 |
def last_updated(self) -> Optional[date]:
|
292 |
+
"""Extract the date that the playlist was last updated.
|
293 |
+
|
294 |
+
:return: Date of last playlist update
|
295 |
+
:rtype: datetime.date
|
296 |
+
"""
|
297 |
+
last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
298 |
+
'stats'][2]['runs'][1]['text']
|
299 |
+
date_components = last_updated_text.split()
|
300 |
+
month = date_components[0]
|
301 |
+
day = date_components[1].strip(',')
|
302 |
+
year = date_components[2]
|
303 |
+
return datetime.strptime(
|
304 |
+
f"{month} {day:0>2} {year}", "%b %d %Y"
|
305 |
+
).date()
|
306 |
|
307 |
@property
|
308 |
@cache
|
|
|
312 |
:return: playlist title (name)
|
313 |
:rtype: Optional[str]
|
314 |
"""
|
315 |
+
return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
316 |
+
'title']['runs'][0]['text']
|
317 |
+
|
318 |
+
@property
|
319 |
+
def description(self) -> str:
|
320 |
+
return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
321 |
+
'description']['simpleText']
|
322 |
+
|
323 |
+
@property
|
324 |
+
def length(self):
|
325 |
+
"""Extract the number of videos in the playlist.
|
326 |
+
|
327 |
+
:return: Playlist video count
|
328 |
+
:rtype: int
|
329 |
+
"""
|
330 |
+
count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
331 |
+
'stats'][0]['runs'][0]['text']
|
332 |
+
return int(count_text)
|
333 |
+
|
334 |
+
@property
|
335 |
+
def views(self):
|
336 |
+
"""Extract view count for playlist.
|
337 |
+
|
338 |
+
:return: Playlist view count
|
339 |
+
:rtype: int
|
340 |
+
"""
|
341 |
+
# "1,234,567 views"
|
342 |
+
views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
343 |
+
'stats'][1]['simpleText']
|
344 |
+
# "1,234,567"
|
345 |
+
count_text = views_text.split()[0]
|
346 |
+
# "1234567"
|
347 |
+
count_text = count_text.replace(',', '')
|
348 |
+
return int(count_text)
|
349 |
+
|
350 |
+
@property
|
351 |
+
def owner(self):
|
352 |
+
"""Extract the owner of the playlist.
|
353 |
+
|
354 |
+
:return: Playlist owner name.
|
355 |
+
:rtype: str
|
356 |
+
"""
|
357 |
+
return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
|
358 |
+
'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
|
359 |
+
|
360 |
+
@property
|
361 |
+
def owner_id(self):
|
362 |
+
"""Extract the channel_id of the owner of the playlist.
|
363 |
+
|
364 |
+
:return: Playlist owner's channel ID.
|
365 |
+
:rtype: str
|
366 |
+
"""
|
367 |
+
return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
|
368 |
+
'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
|
369 |
+
'navigationEndpoint']['browseEndpoint']['browseId']
|
370 |
+
|
371 |
+
@property
|
372 |
+
def owner_url(self):
|
373 |
+
"""Create the channel url of the owner of the playlist.
|
374 |
+
|
375 |
+
:return: Playlist owner's channel url.
|
376 |
+
:rtype: str
|
377 |
+
"""
|
378 |
+
return f'https://www.youtube.com/channel/{self.owner_id}'
|
379 |
|
380 |
@staticmethod
|
381 |
def _video_url(watch_path: str):
|
tests/contrib/test_playlist.py
CHANGED
@@ -5,11 +5,8 @@ from pytube import Playlist
|
|
5 |
|
6 |
|
7 |
@mock.patch("pytube.request.get")
|
8 |
-
def test_title(request_get):
|
9 |
-
request_get.return_value =
|
10 |
-
"<title>(149) Python Tutorial for Beginners "
|
11 |
-
"(For Absolute Beginners) - YouTube</title>"
|
12 |
-
)
|
13 |
url = (
|
14 |
"https://www.fakeurl.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ"
|
15 |
"-ghgoSH6n"
|
@@ -18,7 +15,7 @@ def test_title(request_get):
|
|
18 |
pl_title = pl.title
|
19 |
assert (
|
20 |
pl_title
|
21 |
-
== "
|
22 |
)
|
23 |
|
24 |
|
@@ -48,9 +45,9 @@ def test_init_with_watch_url(request_get):
|
|
48 |
|
49 |
|
50 |
@mock.patch("pytube.request.get")
|
51 |
-
def test_last_updated(request_get,
|
52 |
-
expected = datetime.date(2020,
|
53 |
-
request_get.return_value =
|
54 |
playlist = Playlist(
|
55 |
"https://www.youtube.com/playlist?list"
|
56 |
"=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n"
|
@@ -251,8 +248,7 @@ def test_trimmed_pagination_not_found(
|
|
251 |
|
252 |
# test case for playlist with submenus
|
253 |
@mock.patch("pytube.request.get")
|
254 |
-
def test_playlist_submenu(
|
255 |
-
request_get, playlist_submenu_html):
|
256 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
257 |
request_get.side_effect = [
|
258 |
playlist_submenu_html,
|
@@ -264,3 +260,63 @@ def test_playlist_submenu(
|
|
264 |
]
|
265 |
playlist = Playlist(url)
|
266 |
assert len(playlist.video_urls) == 12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
@mock.patch("pytube.request.get")
|
8 |
+
def test_title(request_get, playlist_long_html):
|
9 |
+
request_get.return_value = playlist_long_html
|
|
|
|
|
|
|
10 |
url = (
|
11 |
"https://www.fakeurl.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ"
|
12 |
"-ghgoSH6n"
|
|
|
15 |
pl_title = pl.title
|
16 |
assert (
|
17 |
pl_title
|
18 |
+
== "Python Tutorial for Beginners (For Absolute Beginners)"
|
19 |
)
|
20 |
|
21 |
|
|
|
45 |
|
46 |
|
47 |
@mock.patch("pytube.request.get")
|
48 |
+
def test_last_updated(request_get, playlist_long_html):
|
49 |
+
expected = datetime.date(2020, 10, 8)
|
50 |
+
request_get.return_value = playlist_long_html
|
51 |
playlist = Playlist(
|
52 |
"https://www.youtube.com/playlist?list"
|
53 |
"=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n"
|
|
|
248 |
|
249 |
# test case for playlist with submenus
|
250 |
@mock.patch("pytube.request.get")
|
251 |
+
def test_playlist_submenu(request_get, playlist_submenu_html):
|
|
|
252 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
253 |
request_get.side_effect = [
|
254 |
playlist_submenu_html,
|
|
|
260 |
]
|
261 |
playlist = Playlist(url)
|
262 |
assert len(playlist.video_urls) == 12
|
263 |
+
|
264 |
+
|
265 |
+
@mock.patch("pytube.request.get")
|
266 |
+
def test_playlist_length(request_get, playlist_long_html):
|
267 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
268 |
+
request_get.return_value = playlist_long_html
|
269 |
+
p = Playlist(url)
|
270 |
+
assert p.length == 217
|
271 |
+
|
272 |
+
|
273 |
+
@mock.patch("pytube.request.get")
|
274 |
+
def test_playlist_description(request_get, playlist_long_html):
|
275 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
276 |
+
request_get.return_value = playlist_long_html
|
277 |
+
p = Playlist(url)
|
278 |
+
assert p.description == (
|
279 |
+
'Python Object Oriented - Learning Python in '
|
280 |
+
"simple and easy steps ,python,xml,script,install, A beginner's "
|
281 |
+
'tutorial containing complete knowledge of Python Syntax Object '
|
282 |
+
'Oriented Language, Methods, Tuples,Learn,Python,Tutorial,Interactive,'
|
283 |
+
'Free, Tools/Utilities,Getting the most popular pages from your Apache'
|
284 |
+
' logfile,Make your life easier with Virtualenvwrapper,This site now '
|
285 |
+
'runs on Django,PythonForBeginners.com has a new owner,How to use '
|
286 |
+
'Pillow, a fork of PIL,How to use the Python Imaging Library,Python '
|
287 |
+
'Websites and Tutorials,How to use Envoy,Using Feedparser in Python,'
|
288 |
+
'Subprocess and Shell Commands in Python, Exceptions Handling, '
|
289 |
+
'Sockets, GUI, Extentions, XML Programming'
|
290 |
+
)
|
291 |
+
|
292 |
+
|
293 |
+
@mock.patch("pytube.request.get")
|
294 |
+
def test_playlist_views(request_get, playlist_long_html):
|
295 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
296 |
+
request_get.return_value = playlist_long_html
|
297 |
+
p = Playlist(url)
|
298 |
+
assert p.views == 4617130
|
299 |
+
|
300 |
+
|
301 |
+
@mock.patch("pytube.request.get")
|
302 |
+
def test_playlist_owner(request_get, playlist_long_html):
|
303 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
304 |
+
request_get.return_value = playlist_long_html
|
305 |
+
p = Playlist(url)
|
306 |
+
assert p.owner == 'ProgrammingKnowledge'
|
307 |
+
|
308 |
+
|
309 |
+
@mock.patch("pytube.request.get")
|
310 |
+
def test_playlist_owner_id(request_get, playlist_long_html):
|
311 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
312 |
+
request_get.return_value = playlist_long_html
|
313 |
+
p = Playlist(url)
|
314 |
+
assert p.owner_id == 'UCs6nmQViDpUw0nuIx9c_WvA'
|
315 |
+
|
316 |
+
|
317 |
+
@mock.patch("pytube.request.get")
|
318 |
+
def test_playlist_owner_url(request_get, playlist_long_html):
|
319 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
320 |
+
request_get.return_value = playlist_long_html
|
321 |
+
p = Playlist(url)
|
322 |
+
assert p.owner_url == 'https://www.youtube.com/channel/UCs6nmQViDpUw0nuIx9c_WvA'
|
tests/test_main.py
CHANGED
@@ -7,14 +7,6 @@ from pytube import YouTube
|
|
7 |
from pytube.exceptions import RegexMatchError
|
8 |
|
9 |
|
10 |
-
@mock.patch("pytube.__main__.YouTube")
|
11 |
-
def test_prefetch_deferred(youtube):
|
12 |
-
instance = youtube.return_value
|
13 |
-
instance.prefetch_descramble.return_value = None
|
14 |
-
YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
|
15 |
-
assert not instance.prefetch_descramble.called
|
16 |
-
|
17 |
-
|
18 |
@mock.patch("urllib.request.install_opener")
|
19 |
def test_install_proxy(opener):
|
20 |
proxies = {"http": "http://www.example.com:3128/"}
|
@@ -58,3 +50,11 @@ def test_js_caching(cipher_signature):
|
|
58 |
assert pytube.__js_url__ is not None
|
59 |
assert pytube.__js__ == cipher_signature.js
|
60 |
assert pytube.__js_url__ == cipher_signature.js_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from pytube.exceptions import RegexMatchError
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
@mock.patch("urllib.request.install_opener")
|
11 |
def test_install_proxy(opener):
|
12 |
proxies = {"http": "http://www.example.com:3128/"}
|
|
|
50 |
assert pytube.__js_url__ is not None
|
51 |
assert pytube.__js__ == cipher_signature.js
|
52 |
assert pytube.__js_url__ == cipher_signature.js_url
|
53 |
+
|
54 |
+
|
55 |
+
def test_channel_id(cipher_signature):
|
56 |
+
assert cipher_signature.channel_id == 'UCBR8-60-B28hp2BmDPdntcQ'
|
57 |
+
|
58 |
+
|
59 |
+
def test_channel_url(cipher_signature):
|
60 |
+
assert cipher_signature.channel_url == 'https://www.youtube.com/channel/UCBR8-60-B28hp2BmDPdntcQ' # noqa:E501
|