experiment with wps
Browse files- .flake8 +4 -2
- pytube/contrib/playlist.py +29 -43
.flake8
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
[flake8]
|
2 |
-
ignore = E231,E203,W503
|
3 |
-
max-line-length = 89
|
|
|
|
|
|
1 |
[flake8]
|
2 |
+
ignore = E231,E203,W503,Q000,WPS111,WPS305,WPS348,WPS602,D400,DAR201,S101,DAR101,C812,D104,I001,WPS306,WPS214,D401,WPS229,WPS420
|
3 |
+
max-line-length = 89
|
4 |
+
|
5 |
+
[isort]
|
pytube/contrib/playlist.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
|
|
3 |
|
4 |
import json
|
5 |
import logging
|
@@ -15,9 +16,7 @@ logger = logging.getLogger(__name__)
|
|
15 |
|
16 |
|
17 |
class Playlist:
|
18 |
-
"""
|
19 |
-
playlist
|
20 |
-
"""
|
21 |
|
22 |
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
|
23 |
if proxies:
|
@@ -28,18 +27,16 @@ class Playlist:
|
|
28 |
except IndexError: # assume that url is just the id
|
29 |
self.playlist_id = url
|
30 |
|
31 |
-
self.playlist_url
|
32 |
-
"https://www.youtube.com/playlist?list=" + self.playlist_id
|
33 |
-
)
|
34 |
self.html = request.get(self.playlist_url)
|
35 |
|
36 |
# Needs testing with non-English
|
37 |
self.last_update: Optional[date] = None
|
38 |
-
|
39 |
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
|
40 |
)
|
41 |
-
if
|
42 |
-
month, day, year =
|
43 |
self.last_update = datetime.strptime(
|
44 |
f"{month} {day:0>2} {year}", "%b %d %Y"
|
45 |
).date()
|
@@ -48,15 +45,14 @@ class Playlist:
|
|
48 |
|
49 |
@staticmethod
|
50 |
def _find_load_more_url(req: str) -> Optional[str]:
|
51 |
-
"""Given an html page or
|
52 |
-
and returns the "load more" url if found.
|
53 |
"""
|
54 |
match = re.search(
|
55 |
r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
|
56 |
req,
|
57 |
)
|
58 |
if match:
|
59 |
-
return "https://www.youtube.com
|
60 |
|
61 |
return None
|
62 |
|
@@ -65,8 +61,7 @@ class Playlist:
|
|
65 |
return self.video_urls
|
66 |
|
67 |
def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
|
68 |
-
"""Parse the video links from the page source,
|
69 |
-
returns the /watch?v= part from video link href
|
70 |
"""
|
71 |
req = self.html
|
72 |
videos_urls = self._extract_videos(req)
|
@@ -113,6 +108,7 @@ class Playlist:
|
|
113 |
|
114 |
def trimmed(self, video_id: str) -> Iterable[str]:
|
115 |
"""Retrieve a list of YouTube video URLs trimmed at the given video ID
|
|
|
116 |
i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
|
117 |
:type video_id: str
|
118 |
video ID to trim the returned list of playlist URLs at
|
@@ -121,16 +117,15 @@ class Playlist:
|
|
121 |
List of video URLs from the playlist trimmed at the given ID
|
122 |
"""
|
123 |
for page in self._paginate(until_watch_id=video_id):
|
124 |
-
for watch_path in page
|
125 |
-
yield self._video_url(watch_path)
|
126 |
|
127 |
@property # type: ignore
|
128 |
@cache
|
129 |
def video_urls(self) -> List[str]:
|
130 |
"""Complete links of all the videos in playlist
|
|
|
131 |
:rtype: List[str]
|
132 |
-
:returns:
|
133 |
-
List of video URLs
|
134 |
"""
|
135 |
return [
|
136 |
self._video_url(video) for page in list(self._paginate()) for video in page
|
@@ -138,29 +133,27 @@ class Playlist:
|
|
138 |
|
139 |
@property
|
140 |
def videos(self) -> Iterable[YouTube]:
|
141 |
-
"""
|
|
|
142 |
:rtype: Iterable[YouTube]
|
143 |
"""
|
144 |
-
for url in self.video_urls
|
145 |
-
yield YouTube(url)
|
146 |
|
147 |
@deprecated(
|
148 |
"This call is unnecessary, you can directly access .video_urls or .videos"
|
149 |
)
|
150 |
def populate_video_urls(self) -> List[str]:
|
151 |
"""Complete links of all the videos in playlist
|
|
|
152 |
:rtype: List[str]
|
153 |
-
:returns:
|
154 |
-
List of video URLs
|
155 |
"""
|
156 |
-
|
157 |
return self.video_urls
|
158 |
|
159 |
@deprecated("This function will be removed in the future.")
|
160 |
def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
|
161 |
-
"""
|
162 |
-
|
163 |
-
in the playlist.
|
164 |
If the number of digits required to name a file,is less than is
|
165 |
required to name the last file,it prepends 0s.
|
166 |
So if you have a playlist of 100 videos it will number them like:
|
@@ -185,9 +178,7 @@ class Playlist:
|
|
185 |
reverse_numbering: bool = False,
|
186 |
resolution: str = "720p",
|
187 |
) -> None: # pragma: no cover
|
188 |
-
"""Download all the videos in the the playlist.
|
189 |
-
resolution is 720p (or highest available), later more option
|
190 |
-
should be added to download resolution of choice
|
191 |
|
192 |
:param download_path:
|
193 |
(optional) Output path for the playlist If one is not
|
@@ -206,7 +197,6 @@ class Playlist:
|
|
206 |
Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
|
207 |
:type resolution: str
|
208 |
"""
|
209 |
-
|
210 |
logger.debug("total videos found: %d", len(self.video_urls))
|
211 |
logger.debug("starting download")
|
212 |
|
@@ -231,22 +221,18 @@ class Playlist:
|
|
231 |
|
232 |
@cache
|
233 |
def title(self) -> Optional[str]:
|
234 |
-
"""
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
238 |
match = pattern.search(self.html)
|
239 |
|
240 |
if match is None:
|
241 |
return None
|
242 |
|
243 |
-
return (
|
244 |
-
match.group()
|
245 |
-
.replace(open_tag, "")
|
246 |
-
.replace(end_tag, "")
|
247 |
-
.replace("- YouTube", "")
|
248 |
-
.strip()
|
249 |
-
)
|
250 |
|
251 |
@staticmethod
|
252 |
def _video_url(watch_path: str):
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
"""Module to download a complete playlist from a youtube channel."""
|
4 |
|
5 |
import json
|
6 |
import logging
|
|
|
16 |
|
17 |
|
18 |
class Playlist:
|
19 |
+
"""Load a YouTube playlist with URL or ID"""
|
|
|
|
|
20 |
|
21 |
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
|
22 |
if proxies:
|
|
|
27 |
except IndexError: # assume that url is just the id
|
28 |
self.playlist_id = url
|
29 |
|
30 |
+
self.playlist_url = f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
|
|
|
|
31 |
self.html = request.get(self.playlist_url)
|
32 |
|
33 |
# Needs testing with non-English
|
34 |
self.last_update: Optional[date] = None
|
35 |
+
date_match = re.search(
|
36 |
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
|
37 |
)
|
38 |
+
if date_match:
|
39 |
+
month, day, year = date_match.groups()
|
40 |
self.last_update = datetime.strptime(
|
41 |
f"{month} {day:0>2} {year}", "%b %d %Y"
|
42 |
).date()
|
|
|
45 |
|
46 |
@staticmethod
|
47 |
def _find_load_more_url(req: str) -> Optional[str]:
|
48 |
+
"""Given an html page or fragment, returns the "load more" url if found.
|
|
|
49 |
"""
|
50 |
match = re.search(
|
51 |
r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
|
52 |
req,
|
53 |
)
|
54 |
if match:
|
55 |
+
return f"https://www.youtube.com{match.group(1)}"
|
56 |
|
57 |
return None
|
58 |
|
|
|
61 |
return self.video_urls
|
62 |
|
63 |
def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
|
64 |
+
"""Parse the video links from the page source, yields the /watch?v= part from video link
|
|
|
65 |
"""
|
66 |
req = self.html
|
67 |
videos_urls = self._extract_videos(req)
|
|
|
108 |
|
109 |
def trimmed(self, video_id: str) -> Iterable[str]:
|
110 |
"""Retrieve a list of YouTube video URLs trimmed at the given video ID
|
111 |
+
|
112 |
i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
|
113 |
:type video_id: str
|
114 |
video ID to trim the returned list of playlist URLs at
|
|
|
117 |
List of video URLs from the playlist trimmed at the given ID
|
118 |
"""
|
119 |
for page in self._paginate(until_watch_id=video_id):
|
120 |
+
yield from (self._video_url(watch_path) for watch_path in page)
|
|
|
121 |
|
122 |
@property # type: ignore
|
123 |
@cache
|
124 |
def video_urls(self) -> List[str]:
|
125 |
"""Complete links of all the videos in playlist
|
126 |
+
|
127 |
:rtype: List[str]
|
128 |
+
:returns: List of video URLs
|
|
|
129 |
"""
|
130 |
return [
|
131 |
self._video_url(video) for page in list(self._paginate()) for video in page
|
|
|
133 |
|
134 |
@property
|
135 |
def videos(self) -> Iterable[YouTube]:
|
136 |
+
"""Yields YouTube objects of videos in this playlist
|
137 |
+
|
138 |
:rtype: Iterable[YouTube]
|
139 |
"""
|
140 |
+
yield from (YouTube(url) for url in self.video_urls)
|
|
|
141 |
|
142 |
@deprecated(
|
143 |
"This call is unnecessary, you can directly access .video_urls or .videos"
|
144 |
)
|
145 |
def populate_video_urls(self) -> List[str]:
|
146 |
"""Complete links of all the videos in playlist
|
147 |
+
|
148 |
:rtype: List[str]
|
149 |
+
:returns: List of video URLs
|
|
|
150 |
"""
|
|
|
151 |
return self.video_urls
|
152 |
|
153 |
@deprecated("This function will be removed in the future.")
|
154 |
def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
|
155 |
+
"""Generate number prefixes for the items in the playlist.
|
156 |
+
|
|
|
157 |
If the number of digits required to name a file,is less than is
|
158 |
required to name the last file,it prepends 0s.
|
159 |
So if you have a playlist of 100 videos it will number them like:
|
|
|
178 |
reverse_numbering: bool = False,
|
179 |
resolution: str = "720p",
|
180 |
) -> None: # pragma: no cover
|
181 |
+
"""Download all the videos in the the playlist.
|
|
|
|
|
182 |
|
183 |
:param download_path:
|
184 |
(optional) Output path for the playlist If one is not
|
|
|
197 |
Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
|
198 |
:type resolution: str
|
199 |
"""
|
|
|
200 |
logger.debug("total videos found: %d", len(self.video_urls))
|
201 |
logger.debug("starting download")
|
202 |
|
|
|
221 |
|
222 |
@cache
|
223 |
def title(self) -> Optional[str]:
|
224 |
+
"""Extract playlist title
|
225 |
+
|
226 |
+
:return: playlist title (name)
|
227 |
+
:rtype: Optional[str]
|
228 |
+
"""
|
229 |
+
pattern = re.compile("<title>(.+?)</title>")
|
230 |
match = pattern.search(self.html)
|
231 |
|
232 |
if match is None:
|
233 |
return None
|
234 |
|
235 |
+
return match.group(1).replace("- YouTube", "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
@staticmethod
|
238 |
def _video_url(watch_path: str):
|