hbmartin commited on
Commit
78f9f26
·
unverified ·
2 Parent(s): 090fc4a c95bd40

Merge pull request #31 from hbmartin/playlist-addons

Browse files
pytube/contrib/playlist.py CHANGED
@@ -5,10 +5,11 @@ import json
5
  import logging
6
  import re
7
  from collections import OrderedDict
 
8
  from typing import List, Optional, Iterable, Dict
9
  from urllib.parse import parse_qs
10
 
11
- from pytube import request, YouTube, extract
12
  from pytube.helpers import cache, deprecated
13
  from pytube.mixins import install_proxy
14
 
@@ -34,6 +35,17 @@ class Playlist:
34
  )
35
  self.html = request.get(self.playlist_url)
36
 
 
 
 
 
 
 
 
 
 
 
 
37
  @staticmethod
38
  def _find_load_more_url(req: str) -> Optional[str]:
39
  """Given an html page or a fragment thereof, looks for
@@ -48,11 +60,10 @@ class Playlist:
48
 
49
  return None
50
 
51
- def parse_links(self) -> List[str]:
52
  """Parse the video links from the page source, extracts and
53
  returns the /watch?v= part from video link href
54
  """
55
-
56
  req = self.html
57
 
58
  # split the page source by line and process each line
@@ -63,6 +74,12 @@ class Playlist:
63
  # Simulating a browser request for the load more link
64
  load_more_url = self._find_load_more_url(req)
65
  while load_more_url: # there is an url found
 
 
 
 
 
 
66
  logger.debug("load more url: %s", load_more_url)
67
  req = request.get(load_more_url)
68
  load_more = json.loads(req)
@@ -86,12 +103,8 @@ class Playlist:
86
  :returns:
87
  List of video URLs from the playlist trimmed at the given ID
88
  """
89
- trimmed_urls = []
90
- for url in self.video_urls:
91
- if extract.video_id(url) == video_id:
92
- break
93
- trimmed_urls.append(url)
94
- return trimmed_urls
95
 
96
  @property # type: ignore
97
  @cache
@@ -101,9 +114,7 @@ class Playlist:
101
  :returns:
102
  List of video URLs
103
  """
104
- return [
105
- "https://www.youtube.com" + watch_path for watch_path in self.parse_links()
106
- ]
107
 
108
  @property
109
  def videos(self) -> Iterable[YouTube]:
@@ -213,3 +224,7 @@ class Playlist:
213
  .replace("- YouTube", "")
214
  .strip()
215
  )
 
 
 
 
 
5
  import logging
6
  import re
7
  from collections import OrderedDict
8
+ from datetime import date, datetime
9
  from typing import List, Optional, Iterable, Dict
10
  from urllib.parse import parse_qs
11
 
12
+ from pytube import request, YouTube
13
  from pytube.helpers import cache, deprecated
14
  from pytube.mixins import install_proxy
15
 
 
35
  )
36
  self.html = request.get(self.playlist_url)
37
 
38
+ # Needs testing with non-English
39
+ self.last_update: Optional[date] = None
40
+ results = re.search(
41
+ r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html
42
+ )
43
+ if results:
44
+ month, day, year = results.groups()
45
+ self.last_update = datetime.strptime(
46
+ f"{month} {day:0>2} {year}", "%b %d %Y"
47
+ ).date()
48
+
49
  @staticmethod
50
  def _find_load_more_url(req: str) -> Optional[str]:
51
  """Given an html page or a fragment thereof, looks for
 
60
 
61
  return None
62
 
63
+ def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]:
64
  """Parse the video links from the page source, extracts and
65
  returns the /watch?v= part from video link href
66
  """
 
67
  req = self.html
68
 
69
  # split the page source by line and process each line
 
74
  # Simulating a browser request for the load more link
75
  load_more_url = self._find_load_more_url(req)
76
  while load_more_url: # there is an url found
77
+ if until_watch_id:
78
+ try:
79
+ trim_index = link_list.index(f"/watch?v={until_watch_id}")
80
+ return link_list[:trim_index]
81
+ except ValueError:
82
+ pass
83
  logger.debug("load more url: %s", load_more_url)
84
  req = request.get(load_more_url)
85
  load_more = json.loads(req)
 
103
  :returns:
104
  List of video URLs from the playlist trimmed at the given ID
105
  """
106
+ trimmed_watch = self.parse_links(until_watch_id=video_id)
107
+ return [self._video_url(watch_path) for watch_path in trimmed_watch]
 
 
 
 
108
 
109
  @property # type: ignore
110
  @cache
 
114
  :returns:
115
  List of video URLs
116
  """
117
+ return [self._video_url(watch_path) for watch_path in self.parse_links()]
 
 
118
 
119
  @property
120
  def videos(self) -> Iterable[YouTube]:
 
224
  .replace("- YouTube", "")
225
  .strip()
226
  )
227
+
228
+ @staticmethod
229
+ def _video_url(watch_path: str):
230
+ return f"https://www.youtube.com{watch_path}"
tests/contrib/test_playlist.py CHANGED
@@ -1,4 +1,5 @@
1
  # -*- coding: utf-8 -*-
 
2
  from unittest import mock
3
  from unittest.mock import MagicMock
4
 
@@ -39,6 +40,14 @@ def test_init_with_watch_url(request_get):
39
  )
40
 
41
 
 
 
 
 
 
 
 
 
42
  @mock.patch("pytube.contrib.playlist.request.get")
43
  def test_init_with_watch_id(request_get):
44
  request_get.return_value = ""
@@ -107,6 +116,20 @@ def test_videos(youtube, request_get, playlist_html):
107
  assert len(list(playlist.videos)) == 12
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  @mock.patch("pytube.contrib.playlist.request.get")
111
  @mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
112
  def test_proxy(install_proxy, request_get):
@@ -121,7 +144,8 @@ def test_trimmed(request_get, playlist_html):
121
  url = "https://www.fakeurl.com/playlist?list=whatever"
122
  request_get.return_value = playlist_html
123
  playlist = Playlist(url)
124
- playlist._find_load_more_url = MagicMock(return_value=None)
 
125
  assert playlist.trimmed("1BYu65vLKdA") == [
126
  "https://www.youtube.com/watch?v=ujTCoH21GlA",
127
  "https://www.youtube.com/watch?v=45ryDIPHdGg",
 
1
  # -*- coding: utf-8 -*-
2
+ import datetime
3
  from unittest import mock
4
  from unittest.mock import MagicMock
5
 
 
40
  )
41
 
42
 
43
+ @mock.patch("pytube.contrib.playlist.request.get")
44
+ def test_last_update(request_get, playlist_html):
45
+ expected = datetime.date(2019, 3, 7)
46
+ request_get.return_value = playlist_html
47
+ playlist = Playlist("url")
48
+ assert playlist.last_update == expected
49
+
50
+
51
  @mock.patch("pytube.contrib.playlist.request.get")
52
  def test_init_with_watch_id(request_get):
53
  request_get.return_value = ""
 
116
  assert len(list(playlist.videos)) == 12
117
 
118
 
119
+ @mock.patch("pytube.contrib.playlist.request.get")
120
+ @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
121
+ def test_load_more(youtube, request_get, playlist_html):
122
+ url = "https://www.fakeurl.com/playlist?list=whatever"
123
+ request_get.side_effect = [
124
+ playlist_html,
125
+ '{"content_html":"", "load_more_widget_html":""}',
126
+ ]
127
+ playlist = Playlist(url)
128
+ playlist._find_load_more_url = MagicMock(side_effect=["dummy", None])
129
+ request_get.assert_called()
130
+ assert len(list(playlist.videos)) == 12
131
+
132
+
133
  @mock.patch("pytube.contrib.playlist.request.get")
134
  @mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
135
  def test_proxy(install_proxy, request_get):
 
144
  url = "https://www.fakeurl.com/playlist?list=whatever"
145
  request_get.return_value = playlist_html
146
  playlist = Playlist(url)
147
+ playlist._find_load_more_url = MagicMock(return_value="dummy")
148
+ assert request_get.call_count == 1
149
  assert playlist.trimmed("1BYu65vLKdA") == [
150
  "https://www.youtube.com/watch?v=ujTCoH21GlA",
151
  "https://www.youtube.com/watch?v=45ryDIPHdGg",