Merge pull request #31 from hbmartin/playlist-addons
Browse files- pytube/contrib/playlist.py +27 -12
- tests/contrib/test_playlist.py +25 -1
pytube/contrib/playlist.py
CHANGED
@@ -5,10 +5,11 @@ import json
|
|
5 |
import logging
|
6 |
import re
|
7 |
from collections import OrderedDict
|
|
|
8 |
from typing import List, Optional, Iterable, Dict
|
9 |
from urllib.parse import parse_qs
|
10 |
|
11 |
-
from pytube import request, YouTube
|
12 |
from pytube.helpers import cache, deprecated
|
13 |
from pytube.mixins import install_proxy
|
14 |
|
@@ -34,6 +35,17 @@ class Playlist:
|
|
34 |
)
|
35 |
self.html = request.get(self.playlist_url)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
@staticmethod
|
38 |
def _find_load_more_url(req: str) -> Optional[str]:
|
39 |
"""Given an html page or a fragment thereof, looks for
|
@@ -48,11 +60,10 @@ class Playlist:
|
|
48 |
|
49 |
return None
|
50 |
|
51 |
-
def parse_links(self) -> List[str]:
|
52 |
"""Parse the video links from the page source, extracts and
|
53 |
returns the /watch?v= part from video link href
|
54 |
"""
|
55 |
-
|
56 |
req = self.html
|
57 |
|
58 |
# split the page source by line and process each line
|
@@ -63,6 +74,12 @@ class Playlist:
|
|
63 |
# Simulating a browser request for the load more link
|
64 |
load_more_url = self._find_load_more_url(req)
|
65 |
while load_more_url: # there is an url found
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
logger.debug("load more url: %s", load_more_url)
|
67 |
req = request.get(load_more_url)
|
68 |
load_more = json.loads(req)
|
@@ -86,12 +103,8 @@ class Playlist:
|
|
86 |
:returns:
|
87 |
List of video URLs from the playlist trimmed at the given ID
|
88 |
"""
|
89 |
-
|
90 |
-
for
|
91 |
-
if extract.video_id(url) == video_id:
|
92 |
-
break
|
93 |
-
trimmed_urls.append(url)
|
94 |
-
return trimmed_urls
|
95 |
|
96 |
@property # type: ignore
|
97 |
@cache
|
@@ -101,9 +114,7 @@ class Playlist:
|
|
101 |
:returns:
|
102 |
List of video URLs
|
103 |
"""
|
104 |
-
return [
|
105 |
-
"https://www.youtube.com" + watch_path for watch_path in self.parse_links()
|
106 |
-
]
|
107 |
|
108 |
@property
|
109 |
def videos(self) -> Iterable[YouTube]:
|
@@ -213,3 +224,7 @@ class Playlist:
|
|
213 |
.replace("- YouTube", "")
|
214 |
.strip()
|
215 |
)
|
|
|
|
|
|
|
|
|
|
5 |
import logging
|
6 |
import re
|
7 |
from collections import OrderedDict
|
8 |
+
from datetime import date, datetime
|
9 |
from typing import List, Optional, Iterable, Dict
|
10 |
from urllib.parse import parse_qs
|
11 |
|
12 |
+
from pytube import request, YouTube
|
13 |
from pytube.helpers import cache, deprecated
|
14 |
from pytube.mixins import install_proxy
|
15 |
|
|
|
35 |
)
|
36 |
self.html = request.get(self.playlist_url)
|
37 |
|
38 |
+
# Needs testing with non-English
|
39 |
+
self.last_update: Optional[date] = None
|
40 |
+
results = re.search(
|
41 |
+
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})<\/li>", self.html
|
42 |
+
)
|
43 |
+
if results:
|
44 |
+
month, day, year = results.groups()
|
45 |
+
self.last_update = datetime.strptime(
|
46 |
+
f"{month} {day:0>2} {year}", "%b %d %Y"
|
47 |
+
).date()
|
48 |
+
|
49 |
@staticmethod
|
50 |
def _find_load_more_url(req: str) -> Optional[str]:
|
51 |
"""Given an html page or a fragment thereof, looks for
|
|
|
60 |
|
61 |
return None
|
62 |
|
63 |
+
def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]:
|
64 |
"""Parse the video links from the page source, extracts and
|
65 |
returns the /watch?v= part from video link href
|
66 |
"""
|
|
|
67 |
req = self.html
|
68 |
|
69 |
# split the page source by line and process each line
|
|
|
74 |
# Simulating a browser request for the load more link
|
75 |
load_more_url = self._find_load_more_url(req)
|
76 |
while load_more_url: # there is an url found
|
77 |
+
if until_watch_id:
|
78 |
+
try:
|
79 |
+
trim_index = link_list.index(f"/watch?v={until_watch_id}")
|
80 |
+
return link_list[:trim_index]
|
81 |
+
except ValueError:
|
82 |
+
pass
|
83 |
logger.debug("load more url: %s", load_more_url)
|
84 |
req = request.get(load_more_url)
|
85 |
load_more = json.loads(req)
|
|
|
103 |
:returns:
|
104 |
List of video URLs from the playlist trimmed at the given ID
|
105 |
"""
|
106 |
+
trimmed_watch = self.parse_links(until_watch_id=video_id)
|
107 |
+
return [self._video_url(watch_path) for watch_path in trimmed_watch]
|
|
|
|
|
|
|
|
|
108 |
|
109 |
@property # type: ignore
|
110 |
@cache
|
|
|
114 |
:returns:
|
115 |
List of video URLs
|
116 |
"""
|
117 |
+
return [self._video_url(watch_path) for watch_path in self.parse_links()]
|
|
|
|
|
118 |
|
119 |
@property
|
120 |
def videos(self) -> Iterable[YouTube]:
|
|
|
224 |
.replace("- YouTube", "")
|
225 |
.strip()
|
226 |
)
|
227 |
+
|
228 |
+
@staticmethod
|
229 |
+
def _video_url(watch_path: str):
|
230 |
+
return f"https://www.youtube.com{watch_path}"
|
tests/contrib/test_playlist.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
from unittest import mock
|
3 |
from unittest.mock import MagicMock
|
4 |
|
@@ -39,6 +40,14 @@ def test_init_with_watch_url(request_get):
|
|
39 |
)
|
40 |
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
@mock.patch("pytube.contrib.playlist.request.get")
|
43 |
def test_init_with_watch_id(request_get):
|
44 |
request_get.return_value = ""
|
@@ -107,6 +116,20 @@ def test_videos(youtube, request_get, playlist_html):
|
|
107 |
assert len(list(playlist.videos)) == 12
|
108 |
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
@mock.patch("pytube.contrib.playlist.request.get")
|
111 |
@mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
|
112 |
def test_proxy(install_proxy, request_get):
|
@@ -121,7 +144,8 @@ def test_trimmed(request_get, playlist_html):
|
|
121 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
122 |
request_get.return_value = playlist_html
|
123 |
playlist = Playlist(url)
|
124 |
-
playlist._find_load_more_url = MagicMock(return_value=
|
|
|
125 |
assert playlist.trimmed("1BYu65vLKdA") == [
|
126 |
"https://www.youtube.com/watch?v=ujTCoH21GlA",
|
127 |
"https://www.youtube.com/watch?v=45ryDIPHdGg",
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
import datetime
|
3 |
from unittest import mock
|
4 |
from unittest.mock import MagicMock
|
5 |
|
|
|
40 |
)
|
41 |
|
42 |
|
43 |
+
@mock.patch("pytube.contrib.playlist.request.get")
|
44 |
+
def test_last_update(request_get, playlist_html):
|
45 |
+
expected = datetime.date(2019, 3, 7)
|
46 |
+
request_get.return_value = playlist_html
|
47 |
+
playlist = Playlist("url")
|
48 |
+
assert playlist.last_update == expected
|
49 |
+
|
50 |
+
|
51 |
@mock.patch("pytube.contrib.playlist.request.get")
|
52 |
def test_init_with_watch_id(request_get):
|
53 |
request_get.return_value = ""
|
|
|
116 |
assert len(list(playlist.videos)) == 12
|
117 |
|
118 |
|
119 |
+
@mock.patch("pytube.contrib.playlist.request.get")
|
120 |
+
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
|
121 |
+
def test_load_more(youtube, request_get, playlist_html):
|
122 |
+
url = "https://www.fakeurl.com/playlist?list=whatever"
|
123 |
+
request_get.side_effect = [
|
124 |
+
playlist_html,
|
125 |
+
'{"content_html":"", "load_more_widget_html":""}',
|
126 |
+
]
|
127 |
+
playlist = Playlist(url)
|
128 |
+
playlist._find_load_more_url = MagicMock(side_effect=["dummy", None])
|
129 |
+
request_get.assert_called()
|
130 |
+
assert len(list(playlist.videos)) == 12
|
131 |
+
|
132 |
+
|
133 |
@mock.patch("pytube.contrib.playlist.request.get")
|
134 |
@mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
|
135 |
def test_proxy(install_proxy, request_get):
|
|
|
144 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
145 |
request_get.return_value = playlist_html
|
146 |
playlist = Playlist(url)
|
147 |
+
playlist._find_load_more_url = MagicMock(return_value="dummy")
|
148 |
+
assert request_get.call_count == 1
|
149 |
assert playlist.trimmed("1BYu65vLKdA") == [
|
150 |
"https://www.youtube.com/watch?v=ujTCoH21GlA",
|
151 |
"https://www.youtube.com/watch?v=45ryDIPHdGg",
|