Taylor Fox Dahlin commited on
Commit
af1eeee
·
unverified ·
1 Parent(s): 3d091d9

Improve metadata availability (#988)

Browse files

* Added channel id and channel url properties to YouTube object.

* Added some metadata to playlist object:
- owner
- owner_id
- owner_url
- description
- length
- views

pytube/__main__.py CHANGED
@@ -422,6 +422,20 @@ class YouTube:
422
  """
423
  return self.player_response.get('videoDetails', {}).get('keywords', [])
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  @property
426
  def metadata(self) -> Optional[YouTubeMetadata]:
427
  """Get the metadata for the video.
 
422
  """
423
  return self.player_response.get('videoDetails', {}).get('keywords', [])
424
 
425
+ @property
426
+ def channel_id(self) -> str:
427
+ """Get the video poster's channel id.
428
+ :rtype: str
429
+ """
430
+ return self.player_response.get('videoDetails', {}).get('channelId', None)
431
+
432
+ @property
433
+ def channel_url(self) -> str:
434
+ """Construct the channel url for the video's poster from the channel id.
435
+ :rtype: str
436
+ """
437
+ return f'https://www.youtube.com/channel/{self.channel_id}'
438
+
439
  @property
440
  def metadata(self) -> Optional[YouTubeMetadata]:
441
  """Get the metadata for the video.
pytube/contrib/playlist.py CHANGED
@@ -1,13 +1,12 @@
1
  """Module to download a complete playlist from a youtube channel."""
2
  import json
3
  import logging
4
- import re
5
  from collections.abc import Sequence
6
  from datetime import date, datetime
7
  from typing import Dict, Iterable, List, Optional, Tuple, Union
8
 
9
  from pytube import extract, request, YouTube
10
- from pytube.helpers import cache, DeferredGeneratorList, install_proxy, regex_search, uniqueify
11
 
12
  logger = logging.getLogger(__name__)
13
 
@@ -24,6 +23,8 @@ class Playlist(Sequence):
24
  # These need to be initialized as None for the properties.
25
  self._html = None
26
  self._ytcfg = None
 
 
27
 
28
  self._playlist_id = None
29
 
@@ -52,6 +53,23 @@ class Playlist(Sequence):
52
  self._ytcfg = extract.get_ytcfg(self.html)
53
  return self._ytcfg
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  @property
56
  def yt_api_key(self):
57
  return self.ytcfg['INNERTUBE_API_KEY']
@@ -271,15 +289,20 @@ class Playlist(Sequence):
271
  @property
272
  @cache
273
  def last_updated(self) -> Optional[date]:
274
- date_match = re.search(
275
- r"Last updated on (\w{3}) (\d{1,2}), (\d{4})", self.html
276
- )
277
- if date_match:
278
- month, day, year = date_match.groups()
279
- return datetime.strptime(
280
- f"{month} {day:0>2} {year}", "%b %d %Y"
281
- ).date()
282
- return None
 
 
 
 
 
283
 
284
  @property
285
  @cache
@@ -289,8 +312,70 @@ class Playlist(Sequence):
289
  :return: playlist title (name)
290
  :rtype: Optional[str]
291
  """
292
- pattern = r"<title>(.+?)</title>"
293
- return regex_search(pattern, self.html, 1).replace("- YouTube", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
  @staticmethod
296
  def _video_url(watch_path: str):
 
1
  """Module to download a complete playlist from a youtube channel."""
2
  import json
3
  import logging
 
4
  from collections.abc import Sequence
5
  from datetime import date, datetime
6
  from typing import Dict, Iterable, List, Optional, Tuple, Union
7
 
8
  from pytube import extract, request, YouTube
9
+ from pytube.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
10
 
11
  logger = logging.getLogger(__name__)
12
 
 
23
  # These need to be initialized as None for the properties.
24
  self._html = None
25
  self._ytcfg = None
26
+ self._initial_data = None
27
+ self._sidebar_info = None
28
 
29
  self._playlist_id = None
30
 
 
53
  self._ytcfg = extract.get_ytcfg(self.html)
54
  return self._ytcfg
55
 
56
+ @property
57
+ def initial_data(self):
58
+ if self._initial_data:
59
+ return self._initial_data
60
+ else:
61
+ self._initial_data = extract.initial_data(self.html)
62
+ return self._initial_data
63
+
64
+ @property
65
+ def sidebar_info(self):
66
+ if self._sidebar_info:
67
+ return self._sidebar_info
68
+ else:
69
+ self._sidebar_info = self.initial_data['sidebar'][
70
+ 'playlistSidebarRenderer']['items']
71
+ return self._sidebar_info
72
+
73
  @property
74
  def yt_api_key(self):
75
  return self.ytcfg['INNERTUBE_API_KEY']
 
289
  @property
290
  @cache
291
  def last_updated(self) -> Optional[date]:
292
+ """Extract the date that the playlist was last updated.
293
+
294
+ :return: Date of last playlist update
295
+ :rtype: datetime.date
296
+ """
297
+ last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
298
+ 'stats'][2]['runs'][1]['text']
299
+ date_components = last_updated_text.split()
300
+ month = date_components[0]
301
+ day = date_components[1].strip(',')
302
+ year = date_components[2]
303
+ return datetime.strptime(
304
+ f"{month} {day:0>2} {year}", "%b %d %Y"
305
+ ).date()
306
 
307
  @property
308
  @cache
 
312
  :return: playlist title (name)
313
  :rtype: Optional[str]
314
  """
315
+ return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
316
+ 'title']['runs'][0]['text']
317
+
318
+ @property
319
+ def description(self) -> str:
320
+ return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
321
+ 'description']['simpleText']
322
+
323
+ @property
324
+ def length(self):
325
+ """Extract the number of videos in the playlist.
326
+
327
+ :return: Playlist video count
328
+ :rtype: int
329
+ """
330
+ count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
331
+ 'stats'][0]['runs'][0]['text']
332
+ return int(count_text)
333
+
334
+ @property
335
+ def views(self):
336
+ """Extract view count for playlist.
337
+
338
+ :return: Playlist view count
339
+ :rtype: int
340
+ """
341
+ # "1,234,567 views"
342
+ views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
343
+ 'stats'][1]['simpleText']
344
+ # "1,234,567"
345
+ count_text = views_text.split()[0]
346
+ # "1234567"
347
+ count_text = count_text.replace(',', '')
348
+ return int(count_text)
349
+
350
+ @property
351
+ def owner(self):
352
+ """Extract the owner of the playlist.
353
+
354
+ :return: Playlist owner name.
355
+ :rtype: str
356
+ """
357
+ return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
358
+ 'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
359
+
360
+ @property
361
+ def owner_id(self):
362
+ """Extract the channel_id of the owner of the playlist.
363
+
364
+ :return: Playlist owner's channel ID.
365
+ :rtype: str
366
+ """
367
+ return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
368
+ 'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
369
+ 'navigationEndpoint']['browseEndpoint']['browseId']
370
+
371
+ @property
372
+ def owner_url(self):
373
+ """Create the channel url of the owner of the playlist.
374
+
375
+ :return: Playlist owner's channel url.
376
+ :rtype: str
377
+ """
378
+ return f'https://www.youtube.com/channel/{self.owner_id}'
379
 
380
  @staticmethod
381
  def _video_url(watch_path: str):
tests/contrib/test_playlist.py CHANGED
@@ -5,11 +5,8 @@ from pytube import Playlist
5
 
6
 
7
  @mock.patch("pytube.request.get")
8
- def test_title(request_get):
9
- request_get.return_value = (
10
- "<title>(149) Python Tutorial for Beginners "
11
- "(For Absolute Beginners) - YouTube</title>"
12
- )
13
  url = (
14
  "https://www.fakeurl.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ"
15
  "-ghgoSH6n"
@@ -18,7 +15,7 @@ def test_title(request_get):
18
  pl_title = pl.title
19
  assert (
20
  pl_title
21
- == "(149) Python Tutorial for Beginners (For Absolute Beginners)"
22
  )
23
 
24
 
@@ -48,9 +45,9 @@ def test_init_with_watch_url(request_get):
48
 
49
 
50
  @mock.patch("pytube.request.get")
51
- def test_last_updated(request_get, playlist_html):
52
- expected = datetime.date(2020, 3, 11)
53
- request_get.return_value = playlist_html
54
  playlist = Playlist(
55
  "https://www.youtube.com/playlist?list"
56
  "=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n"
@@ -251,8 +248,7 @@ def test_trimmed_pagination_not_found(
251
 
252
  # test case for playlist with submenus
253
  @mock.patch("pytube.request.get")
254
- def test_playlist_submenu(
255
- request_get, playlist_submenu_html):
256
  url = "https://www.fakeurl.com/playlist?list=whatever"
257
  request_get.side_effect = [
258
  playlist_submenu_html,
@@ -264,3 +260,63 @@ def test_playlist_submenu(
264
  ]
265
  playlist = Playlist(url)
266
  assert len(playlist.video_urls) == 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  @mock.patch("pytube.request.get")
8
+ def test_title(request_get, playlist_long_html):
9
+ request_get.return_value = playlist_long_html
 
 
 
10
  url = (
11
  "https://www.fakeurl.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ"
12
  "-ghgoSH6n"
 
15
  pl_title = pl.title
16
  assert (
17
  pl_title
18
+ == "Python Tutorial for Beginners (For Absolute Beginners)"
19
  )
20
 
21
 
 
45
 
46
 
47
  @mock.patch("pytube.request.get")
48
+ def test_last_updated(request_get, playlist_long_html):
49
+ expected = datetime.date(2020, 10, 8)
50
+ request_get.return_value = playlist_long_html
51
  playlist = Playlist(
52
  "https://www.youtube.com/playlist?list"
53
  "=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n"
 
248
 
249
  # test case for playlist with submenus
250
  @mock.patch("pytube.request.get")
251
+ def test_playlist_submenu(request_get, playlist_submenu_html):
 
252
  url = "https://www.fakeurl.com/playlist?list=whatever"
253
  request_get.side_effect = [
254
  playlist_submenu_html,
 
260
  ]
261
  playlist = Playlist(url)
262
  assert len(playlist.video_urls) == 12
263
+
264
+
265
+ @mock.patch("pytube.request.get")
266
+ def test_playlist_length(request_get, playlist_long_html):
267
+ url = 'https://www.example.com/playlist?list=whatever'
268
+ request_get.return_value = playlist_long_html
269
+ p = Playlist(url)
270
+ assert p.length == 217
271
+
272
+
273
+ @mock.patch("pytube.request.get")
274
+ def test_playlist_description(request_get, playlist_long_html):
275
+ url = 'https://www.example.com/playlist?list=whatever'
276
+ request_get.return_value = playlist_long_html
277
+ p = Playlist(url)
278
+ assert p.description == (
279
+ 'Python Object Oriented - Learning Python in '
280
+ "simple and easy steps ,python,xml,script,install, A beginner's "
281
+ 'tutorial containing complete knowledge of Python Syntax Object '
282
+ 'Oriented Language, Methods, Tuples,Learn,Python,Tutorial,Interactive,'
283
+ 'Free, Tools/Utilities,Getting the most popular pages from your Apache'
284
+ ' logfile,Make your life easier with Virtualenvwrapper,This site now '
285
+ 'runs on Django,PythonForBeginners.com has a new owner,How to use '
286
+ 'Pillow, a fork of PIL,How to use the Python Imaging Library,Python '
287
+ 'Websites and Tutorials,How to use Envoy,Using Feedparser in Python,'
288
+ 'Subprocess and Shell Commands in Python, Exceptions Handling, '
289
+ 'Sockets, GUI, Extentions, XML Programming'
290
+ )
291
+
292
+
293
+ @mock.patch("pytube.request.get")
294
+ def test_playlist_views(request_get, playlist_long_html):
295
+ url = 'https://www.example.com/playlist?list=whatever'
296
+ request_get.return_value = playlist_long_html
297
+ p = Playlist(url)
298
+ assert p.views == 4617130
299
+
300
+
301
+ @mock.patch("pytube.request.get")
302
+ def test_playlist_owner(request_get, playlist_long_html):
303
+ url = 'https://www.example.com/playlist?list=whatever'
304
+ request_get.return_value = playlist_long_html
305
+ p = Playlist(url)
306
+ assert p.owner == 'ProgrammingKnowledge'
307
+
308
+
309
+ @mock.patch("pytube.request.get")
310
+ def test_playlist_owner_id(request_get, playlist_long_html):
311
+ url = 'https://www.example.com/playlist?list=whatever'
312
+ request_get.return_value = playlist_long_html
313
+ p = Playlist(url)
314
+ assert p.owner_id == 'UCs6nmQViDpUw0nuIx9c_WvA'
315
+
316
+
317
+ @mock.patch("pytube.request.get")
318
+ def test_playlist_owner_url(request_get, playlist_long_html):
319
+ url = 'https://www.example.com/playlist?list=whatever'
320
+ request_get.return_value = playlist_long_html
321
+ p = Playlist(url)
322
+ assert p.owner_url == 'https://www.youtube.com/channel/UCs6nmQViDpUw0nuIx9c_WvA'
tests/test_main.py CHANGED
@@ -7,14 +7,6 @@ from pytube import YouTube
7
  from pytube.exceptions import RegexMatchError
8
 
9
 
10
- @mock.patch("pytube.__main__.YouTube")
11
- def test_prefetch_deferred(youtube):
12
- instance = youtube.return_value
13
- instance.prefetch_descramble.return_value = None
14
- YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
15
- assert not instance.prefetch_descramble.called
16
-
17
-
18
  @mock.patch("urllib.request.install_opener")
19
  def test_install_proxy(opener):
20
  proxies = {"http": "http://www.example.com:3128/"}
@@ -58,3 +50,11 @@ def test_js_caching(cipher_signature):
58
  assert pytube.__js_url__ is not None
59
  assert pytube.__js__ == cipher_signature.js
60
  assert pytube.__js_url__ == cipher_signature.js_url
 
 
 
 
 
 
 
 
 
7
  from pytube.exceptions import RegexMatchError
8
 
9
 
 
 
 
 
 
 
 
 
10
  @mock.patch("urllib.request.install_opener")
11
  def test_install_proxy(opener):
12
  proxies = {"http": "http://www.example.com:3128/"}
 
50
  assert pytube.__js_url__ is not None
51
  assert pytube.__js__ == cipher_signature.js
52
  assert pytube.__js_url__ == cipher_signature.js_url
53
+
54
+
55
+ def test_channel_id(cipher_signature):
56
+ assert cipher_signature.channel_id == 'UCBR8-60-B28hp2BmDPdntcQ'
57
+
58
+
59
+ def test_channel_url(cipher_signature):
60
+ assert cipher_signature.channel_url == 'https://www.youtube.com/channel/UCBR8-60-B28hp2BmDPdntcQ' # noqa:E501