bigbluedeer
commited on
Fix for the new youtube playlist requests (#950)
Browse files* Fix for the new youtube playlist requests
* Implements pytube.request.post in order to fetch continuations
* Implements ytcfg extraction
* Slight refactor of object parser
* Extends _execute_request to take a data argument for post requests
* Adjusted tests to reflect changes
- pytube/contrib/playlist.py +48 -17
- pytube/extract.py +35 -0
- pytube/parser.py +51 -3
- pytube/request.py +32 -2
- tests/contrib/test_playlist.py +44 -34
pytube/contrib/playlist.py
CHANGED
@@ -30,12 +30,33 @@ class Playlist(Sequence):
|
|
30 |
if proxies:
|
31 |
install_proxy(proxies)
|
32 |
|
|
|
|
|
|
|
|
|
33 |
self.playlist_id = extract.playlist_id(url)
|
34 |
|
35 |
self.playlist_url = (
|
36 |
f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
37 |
)
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
def _paginate(
|
41 |
self, until_watch_id: Optional[str] = None
|
@@ -49,7 +70,6 @@ class Playlist(Sequence):
|
|
49 |
:rtype: Iterable[List[str]]
|
50 |
:returns: Iterable of lists of YouTube watch ids
|
51 |
"""
|
52 |
-
req = self.html
|
53 |
videos_urls, continuation = self._extract_videos(
|
54 |
json.dumps(extract.initial_data(self.html))
|
55 |
)
|
@@ -67,15 +87,15 @@ class Playlist(Sequence):
|
|
67 |
# than 100 songs inside a playlist, so we need to add further requests
|
68 |
# to gather all of them
|
69 |
if continuation:
|
70 |
-
load_more_url, headers = self._build_continuation_url(continuation)
|
71 |
else:
|
72 |
-
load_more_url, headers = None, None
|
73 |
|
74 |
-
while load_more_url and headers: # there is an url found
|
75 |
logger.debug("load more url: %s", load_more_url)
|
76 |
# requesting the next page of videos with the url generated from the
|
77 |
-
# previous page
|
78 |
-
req = request.
|
79 |
# extract up to 100 songs from the page loaded
|
80 |
# returns another continuation if more videos are available
|
81 |
videos_urls, continuation = self._extract_videos(req)
|
@@ -89,32 +109,43 @@ class Playlist(Sequence):
|
|
89 |
yield videos_urls
|
90 |
|
91 |
if continuation:
|
92 |
-
load_more_url, headers = self._build_continuation_url(
|
93 |
continuation
|
94 |
)
|
95 |
else:
|
96 |
-
load_more_url, headers = None, None
|
97 |
|
98 |
-
|
99 |
-
def _build_continuation_url(continuation: str) -> Tuple[str, dict]:
|
100 |
"""Helper method to build the url and headers required to request
|
101 |
the next page of videos
|
102 |
|
103 |
:param str continuation: Continuation extracted from the json response
|
104 |
of the last page
|
105 |
-
:rtype: Tuple[str, dict]
|
106 |
:returns: Tuple of an url and required headers for the next http
|
107 |
request
|
108 |
"""
|
109 |
return (
|
110 |
(
|
111 |
-
|
112 |
-
|
|
|
|
|
113 |
),
|
114 |
{
|
115 |
"X-YouTube-Client-Name": "1",
|
116 |
"X-YouTube-Client-Version": "2.20200720.00.02",
|
117 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
)
|
119 |
|
120 |
@staticmethod
|
@@ -150,9 +181,9 @@ class Playlist(Sequence):
|
|
150 |
try:
|
151 |
# this is the json tree structure, if the json was directly sent
|
152 |
# by the server in a continuation response
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
videos = important_content
|
157 |
except (KeyError, IndexError, TypeError) as p:
|
158 |
print(p)
|
|
|
30 |
if proxies:
|
31 |
install_proxy(proxies)
|
32 |
|
33 |
+
# These need to be initialized as None for the properties.
|
34 |
+
self._html = None
|
35 |
+
self._ytcfg = None
|
36 |
+
|
37 |
self.playlist_id = extract.playlist_id(url)
|
38 |
|
39 |
self.playlist_url = (
|
40 |
f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
41 |
)
|
42 |
+
|
43 |
+
@property
|
44 |
+
def html(self):
|
45 |
+
if self._html:
|
46 |
+
return self._html
|
47 |
+
self._html = request.get(self.playlist_url)
|
48 |
+
return self._html
|
49 |
+
|
50 |
+
@property
|
51 |
+
def ytcfg(self):
|
52 |
+
if self._ytcfg:
|
53 |
+
return self._ytcfg
|
54 |
+
self._ytcfg = extract.get_ytcfg(self.html)
|
55 |
+
return self._ytcfg
|
56 |
+
|
57 |
+
@property
|
58 |
+
def yt_api_key(self):
|
59 |
+
return self.ytcfg['INNERTUBE_API_KEY']
|
60 |
|
61 |
def _paginate(
|
62 |
self, until_watch_id: Optional[str] = None
|
|
|
70 |
:rtype: Iterable[List[str]]
|
71 |
:returns: Iterable of lists of YouTube watch ids
|
72 |
"""
|
|
|
73 |
videos_urls, continuation = self._extract_videos(
|
74 |
json.dumps(extract.initial_data(self.html))
|
75 |
)
|
|
|
87 |
# than 100 songs inside a playlist, so we need to add further requests
|
88 |
# to gather all of them
|
89 |
if continuation:
|
90 |
+
load_more_url, headers, data = self._build_continuation_url(continuation)
|
91 |
else:
|
92 |
+
load_more_url, headers, data = None, None, None
|
93 |
|
94 |
+
while load_more_url and headers and data: # there is an url found
|
95 |
logger.debug("load more url: %s", load_more_url)
|
96 |
# requesting the next page of videos with the url generated from the
|
97 |
+
# previous page, needs to be a post
|
98 |
+
req = request.post(load_more_url, extra_headers=headers, data=data)
|
99 |
# extract up to 100 songs from the page loaded
|
100 |
# returns another continuation if more videos are available
|
101 |
videos_urls, continuation = self._extract_videos(req)
|
|
|
109 |
yield videos_urls
|
110 |
|
111 |
if continuation:
|
112 |
+
load_more_url, headers, data = self._build_continuation_url(
|
113 |
continuation
|
114 |
)
|
115 |
else:
|
116 |
+
load_more_url, headers, data = None, None, None
|
117 |
|
118 |
+
def _build_continuation_url(self, continuation: str) -> Tuple[str, dict, dict]:
|
|
|
119 |
"""Helper method to build the url and headers required to request
|
120 |
the next page of videos
|
121 |
|
122 |
:param str continuation: Continuation extracted from the json response
|
123 |
of the last page
|
124 |
+
:rtype: Tuple[str, dict, dict]
|
125 |
:returns: Tuple of an url and required headers for the next http
|
126 |
request
|
127 |
"""
|
128 |
return (
|
129 |
(
|
130 |
+
# was changed to this format (and post requests)
|
131 |
+
# between 2021.03.02 and 2021.03.03
|
132 |
+
"https://www.youtube.com/youtubei/v1/browse?key="
|
133 |
+
f"{self.yt_api_key}"
|
134 |
),
|
135 |
{
|
136 |
"X-YouTube-Client-Name": "1",
|
137 |
"X-YouTube-Client-Version": "2.20200720.00.02",
|
138 |
},
|
139 |
+
# extra data required for post request
|
140 |
+
{
|
141 |
+
"continuation": continuation,
|
142 |
+
"context": {
|
143 |
+
"client": {
|
144 |
+
"clientName": "WEB",
|
145 |
+
"clientVersion": "2.20200720.00.02"
|
146 |
+
}
|
147 |
+
}
|
148 |
+
}
|
149 |
)
|
150 |
|
151 |
@staticmethod
|
|
|
181 |
try:
|
182 |
# this is the json tree structure, if the json was directly sent
|
183 |
# by the server in a continuation response
|
184 |
+
# no longer a list and no longer has the "response" key
|
185 |
+
important_content = initial_data['onResponseReceivedActions'][0][
|
186 |
+
'appendContinuationItemsAction']['continuationItems']
|
187 |
videos = important_content
|
188 |
except (KeyError, IndexError, TypeError) as p:
|
189 |
print(p)
|
pytube/extract.py
CHANGED
@@ -24,6 +24,7 @@ from pytube.exceptions import RegexMatchError
|
|
24 |
from pytube.helpers import regex_search
|
25 |
from pytube.metadata import YouTubeMetadata
|
26 |
from pytube.parser import parse_for_object
|
|
|
27 |
|
28 |
logger = logging.getLogger(__name__)
|
29 |
|
@@ -354,6 +355,40 @@ def get_ytplayer_config(html: str) -> Any:
|
|
354 |
)
|
355 |
|
356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
|
358 |
"""Apply the decrypted signature to the stream manifest.
|
359 |
|
|
|
24 |
from pytube.helpers import regex_search
|
25 |
from pytube.metadata import YouTubeMetadata
|
26 |
from pytube.parser import parse_for_object
|
27 |
+
from pytube.parser import parse_for_all_objects
|
28 |
|
29 |
logger = logging.getLogger(__name__)
|
30 |
|
|
|
355 |
)
|
356 |
|
357 |
|
358 |
+
def get_ytcfg(html: str) -> str:
|
359 |
+
"""Get the entirety of the ytcfg object.
|
360 |
+
|
361 |
+
This is built over multiple pieces, so we have to find all matches and
|
362 |
+
combine the dicts together.
|
363 |
+
|
364 |
+
:param str html:
|
365 |
+
The html contents of the watch page.
|
366 |
+
:rtype: str
|
367 |
+
:returns:
|
368 |
+
Substring of the html containing the encoded manifest data.
|
369 |
+
"""
|
370 |
+
ytcfg = {}
|
371 |
+
ytcfg_patterns = [
|
372 |
+
r"ytcfg\s=\s",
|
373 |
+
r"ytcfg\.set\("
|
374 |
+
]
|
375 |
+
for pattern in ytcfg_patterns:
|
376 |
+
# Try each pattern consecutively and try to build a cohesive object
|
377 |
+
try:
|
378 |
+
found_objects = parse_for_all_objects(html, pattern)
|
379 |
+
for obj in found_objects:
|
380 |
+
ytcfg.update(obj)
|
381 |
+
except HTMLParseError:
|
382 |
+
continue
|
383 |
+
|
384 |
+
if len(ytcfg) > 0:
|
385 |
+
return ytcfg
|
386 |
+
|
387 |
+
raise RegexMatchError(
|
388 |
+
caller="get_ytcfg", pattern="ytcfg_pattenrs"
|
389 |
+
)
|
390 |
+
|
391 |
+
|
392 |
def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
|
393 |
"""Apply the decrypted signature to the stream manifest.
|
394 |
|
pytube/parser.py
CHANGED
@@ -4,6 +4,39 @@ import re
|
|
4 |
from pytube.exceptions import HTMLParseError
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def parse_for_object(html, preceding_regex):
|
8 |
"""Parses input html to find the end of a JavaScript object.
|
9 |
|
@@ -20,11 +53,11 @@ def parse_for_object(html, preceding_regex):
|
|
20 |
if not result:
|
21 |
raise HTMLParseError(f'No matches for regex {preceding_regex}')
|
22 |
|
23 |
-
start_index = result.
|
24 |
return parse_for_object_from_startpoint(html, start_index)
|
25 |
|
26 |
|
27 |
-
def
|
28 |
"""Parses input html to find the end of a JavaScript object.
|
29 |
|
30 |
:param str html:
|
@@ -77,10 +110,25 @@ def parse_for_object_from_startpoint(html, start_point):
|
|
77 |
i += 1
|
78 |
|
79 |
full_obj = html[:i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
try:
|
81 |
return json.loads(full_obj)
|
82 |
except json.decoder.JSONDecodeError:
|
83 |
try:
|
84 |
return ast.literal_eval(full_obj)
|
85 |
-
except ValueError:
|
86 |
raise HTMLParseError('Could not parse object.')
|
|
|
4 |
from pytube.exceptions import HTMLParseError
|
5 |
|
6 |
|
7 |
+
def parse_for_all_objects(html, preceding_regex):
|
8 |
+
"""Parses input html to find all matches for the input starting point.
|
9 |
+
|
10 |
+
:param str html:
|
11 |
+
HTML to be parsed for an object.
|
12 |
+
:param str preceding_regex:
|
13 |
+
Regex to find the string preceding the object.
|
14 |
+
:rtype list:
|
15 |
+
:returns:
|
16 |
+
A list of dicts created from parsing the objects.
|
17 |
+
"""
|
18 |
+
result = []
|
19 |
+
regex = re.compile(preceding_regex)
|
20 |
+
match_iter = regex.finditer(html)
|
21 |
+
for match in match_iter:
|
22 |
+
if match:
|
23 |
+
start_index = match.end()
|
24 |
+
try:
|
25 |
+
obj = parse_for_object_from_startpoint(html, start_index)
|
26 |
+
except HTMLParseError:
|
27 |
+
# Some of the instances might fail because set is technically
|
28 |
+
# a method of the ytcfg object. We'll skip these since they
|
29 |
+
# don't seem relevant at the moment.
|
30 |
+
continue
|
31 |
+
else:
|
32 |
+
result.append(obj)
|
33 |
+
|
34 |
+
if len(result) == 0:
|
35 |
+
raise HTMLParseError(f'No matches for regex {preceding_regex}')
|
36 |
+
|
37 |
+
return result
|
38 |
+
|
39 |
+
|
40 |
def parse_for_object(html, preceding_regex):
|
41 |
"""Parses input html to find the end of a JavaScript object.
|
42 |
|
|
|
53 |
if not result:
|
54 |
raise HTMLParseError(f'No matches for regex {preceding_regex}')
|
55 |
|
56 |
+
start_index = result.end()
|
57 |
return parse_for_object_from_startpoint(html, start_index)
|
58 |
|
59 |
|
60 |
+
def find_object_from_startpoint(html, start_point):
|
61 |
"""Parses input html to find the end of a JavaScript object.
|
62 |
|
63 |
:param str html:
|
|
|
110 |
i += 1
|
111 |
|
112 |
full_obj = html[:i]
|
113 |
+
return full_obj # noqa: R504
|
114 |
+
|
115 |
+
|
116 |
+
def parse_for_object_from_startpoint(html, start_point):
|
117 |
+
"""JSONifies an object parsed from HTML.
|
118 |
+
|
119 |
+
:param str html:
|
120 |
+
HTML to be parsed for an object.
|
121 |
+
:param int start_point:
|
122 |
+
Index of where the object starts.
|
123 |
+
:rtype dict:
|
124 |
+
:returns:
|
125 |
+
A dict created from parsing the object.
|
126 |
+
"""
|
127 |
+
full_obj = find_object_from_startpoint(html, start_point)
|
128 |
try:
|
129 |
return json.loads(full_obj)
|
130 |
except json.decoder.JSONDecodeError:
|
131 |
try:
|
132 |
return ast.literal_eval(full_obj)
|
133 |
+
except (ValueError, SyntaxError):
|
134 |
raise HTMLParseError('Could not parse object.')
|
pytube/request.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
import logging
|
4 |
from functools import lru_cache
|
5 |
import re
|
|
|
6 |
from urllib import parse
|
7 |
from urllib.request import Request
|
8 |
from urllib.request import urlopen
|
@@ -15,12 +16,16 @@ default_chunk_size = 4096 # 4kb
|
|
15 |
default_range_size = 9437184 # 9MB
|
16 |
|
17 |
|
18 |
-
def _execute_request(url, method=None, headers=None):
|
19 |
base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"}
|
20 |
if headers:
|
21 |
base_headers.update(headers)
|
|
|
|
|
|
|
|
|
22 |
if url.lower().startswith("http"):
|
23 |
-
request = Request(url, headers=base_headers, method=method)
|
24 |
else:
|
25 |
raise ValueError("Invalid URL")
|
26 |
return urlopen(request) # nosec
|
@@ -42,6 +47,31 @@ def get(url, extra_headers=None):
|
|
42 |
return _execute_request(url, headers=extra_headers).read().decode("utf-8")
|
43 |
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
def seq_stream(url, chunk_size=default_chunk_size, range_size=default_range_size):
|
46 |
"""Read the response in sequence.
|
47 |
:param str url: The URL to perform the GET request for.
|
|
|
3 |
import logging
|
4 |
from functools import lru_cache
|
5 |
import re
|
6 |
+
import json
|
7 |
from urllib import parse
|
8 |
from urllib.request import Request
|
9 |
from urllib.request import urlopen
|
|
|
16 |
default_range_size = 9437184 # 9MB
|
17 |
|
18 |
|
19 |
+
def _execute_request(url, method=None, headers=None, data=None):
|
20 |
base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"}
|
21 |
if headers:
|
22 |
base_headers.update(headers)
|
23 |
+
if data:
|
24 |
+
# encode data for request
|
25 |
+
if not isinstance(data, bytes):
|
26 |
+
data = bytes(json.dumps(data), encoding="utf-8")
|
27 |
if url.lower().startswith("http"):
|
28 |
+
request = Request(url, headers=base_headers, method=method, data=data)
|
29 |
else:
|
30 |
raise ValueError("Invalid URL")
|
31 |
return urlopen(request) # nosec
|
|
|
47 |
return _execute_request(url, headers=extra_headers).read().decode("utf-8")
|
48 |
|
49 |
|
50 |
+
def post(url, extra_headers=None, data=None):
|
51 |
+
"""Send an http POST request.
|
52 |
+
|
53 |
+
:param str url:
|
54 |
+
The URL to perform the POST request for.
|
55 |
+
:param dict extra_headers:
|
56 |
+
Extra headers to add to the request
|
57 |
+
:param dict data:
|
58 |
+
The data to send on the POST request
|
59 |
+
:rtype: str
|
60 |
+
:returns:
|
61 |
+
UTF-8 encoded string of response
|
62 |
+
"""
|
63 |
+
# could technically be implemented in get,
|
64 |
+
# but to avoid confusion implemented like this
|
65 |
+
if extra_headers is None:
|
66 |
+
extra_headers = {}
|
67 |
+
if data is None:
|
68 |
+
data = {}
|
69 |
+
# required because the youtube servers are strict on content type
|
70 |
+
# raises HTTPError [400]: Bad Request otherwise
|
71 |
+
extra_headers.update({"Content-Type": "application/json"})
|
72 |
+
return _execute_request(url, headers=extra_headers, data=data).read().decode("utf-8")
|
73 |
+
|
74 |
+
|
75 |
def seq_stream(url, chunk_size=default_chunk_size, range_size=default_range_size):
|
76 |
"""Read the response in sequence.
|
77 |
:param str url: The URL to perform the GET request for.
|
tests/contrib/test_playlist.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import datetime
|
3 |
from unittest import mock
|
4 |
-
from unittest.mock import MagicMock
|
5 |
|
6 |
from pytube import Playlist
|
7 |
|
8 |
|
9 |
-
@mock.patch("pytube.
|
10 |
def test_title(request_get):
|
11 |
request_get.return_value = (
|
12 |
"<title>(149) Python Tutorial for Beginners "
|
@@ -24,7 +23,7 @@ def test_title(request_get):
|
|
24 |
)
|
25 |
|
26 |
|
27 |
-
@mock.patch("pytube.
|
28 |
def test_init_with_playlist_url(request_get):
|
29 |
request_get.return_value = ""
|
30 |
url = (
|
@@ -35,7 +34,7 @@ def test_init_with_playlist_url(request_get):
|
|
35 |
assert playlist.playlist_url == url
|
36 |
|
37 |
|
38 |
-
@mock.patch("pytube.
|
39 |
def test_init_with_watch_url(request_get):
|
40 |
request_get.return_value = ""
|
41 |
url = (
|
@@ -49,7 +48,7 @@ def test_init_with_watch_url(request_get):
|
|
49 |
)
|
50 |
|
51 |
|
52 |
-
@mock.patch("pytube.
|
53 |
def test_last_updated(request_get, playlist_html):
|
54 |
expected = datetime.date(2020, 3, 11)
|
55 |
request_get.return_value = playlist_html
|
@@ -60,13 +59,11 @@ def test_last_updated(request_get, playlist_html):
|
|
60 |
assert playlist.last_updated == expected
|
61 |
|
62 |
|
63 |
-
@mock.patch("pytube.
|
64 |
def test_video_urls(request_get, playlist_html):
|
65 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
66 |
request_get.return_value = playlist_html
|
67 |
playlist = Playlist(url)
|
68 |
-
playlist._find_load_more_url = MagicMock(return_value=None)
|
69 |
-
request_get.assert_called()
|
70 |
assert playlist.video_urls == [
|
71 |
"https://www.youtube.com/watch?v=ujTCoH21GlA",
|
72 |
"https://www.youtube.com/watch?v=45ryDIPHdGg",
|
@@ -81,15 +78,23 @@ def test_video_urls(request_get, playlist_html):
|
|
81 |
"https://www.youtube.com/watch?v=g1Zbuk1gAfk",
|
82 |
"https://www.youtube.com/watch?v=zixd-si9Q-o",
|
83 |
]
|
|
|
84 |
|
85 |
|
86 |
-
@mock.patch("pytube.
|
87 |
-
def
|
88 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
89 |
request_get.return_value = playlist_html
|
90 |
playlist = Playlist(url)
|
91 |
-
playlist.
|
92 |
request_get.assert_called()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
assert (
|
94 |
repr(playlist) == "["
|
95 |
"'https://www.youtube.com/watch?v=ujTCoH21GlA', "
|
@@ -106,30 +111,29 @@ def test_repr(request_get, playlist_html):
|
|
106 |
"'https://www.youtube.com/watch?v=zixd-si9Q-o'"
|
107 |
"]"
|
108 |
)
|
|
|
109 |
|
110 |
|
111 |
-
@mock.patch("pytube.
|
112 |
def test_sequence(request_get, playlist_html):
|
113 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
114 |
request_get.return_value = playlist_html
|
115 |
playlist = Playlist(url)
|
116 |
-
playlist._find_load_more_url = MagicMock(return_value=None)
|
117 |
assert playlist[0] == "https://www.youtube.com/watch?v=ujTCoH21GlA"
|
118 |
assert len(playlist) == 12
|
119 |
|
120 |
|
121 |
-
@mock.patch("pytube.
|
122 |
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
|
123 |
def test_videos(youtube, request_get, playlist_html):
|
124 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
125 |
request_get.return_value = playlist_html
|
126 |
playlist = Playlist(url)
|
127 |
-
playlist._find_load_more_url = MagicMock(return_value=None)
|
128 |
-
request_get.assert_called()
|
129 |
assert len(list(playlist.videos)) == 12
|
|
|
130 |
|
131 |
|
132 |
-
@mock.patch("pytube.
|
133 |
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
|
134 |
def test_load_more(youtube, request_get, playlist_html):
|
135 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
@@ -138,12 +142,11 @@ def test_load_more(youtube, request_get, playlist_html):
|
|
138 |
'{"content_html":"", "load_more_widget_html":""}',
|
139 |
]
|
140 |
playlist = Playlist(url)
|
141 |
-
playlist._find_load_more_url = MagicMock(side_effect=["dummy", None])
|
142 |
-
request_get.assert_called()
|
143 |
assert len(list(playlist.videos)) == 12
|
|
|
144 |
|
145 |
|
146 |
-
@mock.patch("pytube.
|
147 |
@mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
|
148 |
def test_proxy(install_proxy, request_get):
|
149 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
@@ -152,31 +155,34 @@ def test_proxy(install_proxy, request_get):
|
|
152 |
install_proxy.assert_called_with({"http": "things"})
|
153 |
|
154 |
|
155 |
-
@mock.patch("pytube.
|
156 |
def test_trimmed(request_get, playlist_html):
|
157 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
158 |
request_get.return_value = playlist_html
|
159 |
playlist = Playlist(url)
|
160 |
-
playlist._find_load_more_url = MagicMock(return_value=None)
|
161 |
-
assert request_get.call_count == 1
|
162 |
trimmed = list(playlist.trimmed("1BYu65vLKdA"))
|
163 |
assert trimmed == [
|
164 |
"https://www.youtube.com/watch?v=ujTCoH21GlA",
|
165 |
"https://www.youtube.com/watch?v=45ryDIPHdGg",
|
166 |
]
|
|
|
167 |
|
168 |
|
169 |
-
@mock.patch("pytube.
|
170 |
-
|
|
|
171 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
172 |
request_get.side_effect = [
|
173 |
playlist_long_html,
|
174 |
-
|
|
|
|
|
175 |
]
|
176 |
playlist = Playlist(url)
|
177 |
video_urls = playlist.video_urls
|
178 |
assert len(video_urls) == 100
|
179 |
-
assert request_get.call_count ==
|
|
|
180 |
# TODO: Cannot get this test to work probably
|
181 |
# request_get.assert_called_with(
|
182 |
# "https://www.youtube.com/browse_ajax?ctoken" # noqa
|
@@ -189,11 +195,14 @@ def test_playlist_failed_pagination(request_get, playlist_long_html):
|
|
189 |
# ) # noqa
|
190 |
|
191 |
|
192 |
-
@mock.patch("pytube.
|
193 |
-
|
|
|
194 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
195 |
request_get.side_effect = [
|
196 |
-
playlist_long_html
|
|
|
|
|
197 |
'{"content_html":"<a '
|
198 |
'href=\\"/watch?v=BcWz41-4cDk&feature=plpp_video&ved'
|
199 |
'=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", '
|
@@ -202,10 +211,11 @@ def test_playlist_pagination(request_get, playlist_html, playlist_long_html):
|
|
202 |
]
|
203 |
playlist = Playlist(url)
|
204 |
assert len(playlist.video_urls) == 100
|
205 |
-
assert request_get.call_count ==
|
|
|
206 |
|
207 |
|
208 |
-
@mock.patch("pytube.
|
209 |
def test_trimmed_pagination(request_get, playlist_html, playlist_long_html):
|
210 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
211 |
request_get.side_effect = [
|
@@ -222,7 +232,7 @@ def test_trimmed_pagination(request_get, playlist_html, playlist_long_html):
|
|
222 |
|
223 |
|
224 |
# TODO: Test case not clear to me
|
225 |
-
@mock.patch("pytube.
|
226 |
def test_trimmed_pagination_not_found(
|
227 |
request_get, playlist_html, playlist_long_html
|
228 |
):
|
@@ -241,7 +251,7 @@ def test_trimmed_pagination_not_found(
|
|
241 |
|
242 |
|
243 |
# test case for playlist with submenus
|
244 |
-
@mock.patch("pytube.
|
245 |
def test_playlist_submenu(
|
246 |
request_get, playlist_submenu_html):
|
247 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import datetime
|
3 |
from unittest import mock
|
|
|
4 |
|
5 |
from pytube import Playlist
|
6 |
|
7 |
|
8 |
+
@mock.patch("pytube.request.get")
|
9 |
def test_title(request_get):
|
10 |
request_get.return_value = (
|
11 |
"<title>(149) Python Tutorial for Beginners "
|
|
|
23 |
)
|
24 |
|
25 |
|
26 |
+
@mock.patch("pytube.request.get")
|
27 |
def test_init_with_playlist_url(request_get):
|
28 |
request_get.return_value = ""
|
29 |
url = (
|
|
|
34 |
assert playlist.playlist_url == url
|
35 |
|
36 |
|
37 |
+
@mock.patch("pytube.request.get")
|
38 |
def test_init_with_watch_url(request_get):
|
39 |
request_get.return_value = ""
|
40 |
url = (
|
|
|
48 |
)
|
49 |
|
50 |
|
51 |
+
@mock.patch("pytube.request.get")
|
52 |
def test_last_updated(request_get, playlist_html):
|
53 |
expected = datetime.date(2020, 3, 11)
|
54 |
request_get.return_value = playlist_html
|
|
|
59 |
assert playlist.last_updated == expected
|
60 |
|
61 |
|
62 |
+
@mock.patch("pytube.request.get")
|
63 |
def test_video_urls(request_get, playlist_html):
|
64 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
65 |
request_get.return_value = playlist_html
|
66 |
playlist = Playlist(url)
|
|
|
|
|
67 |
assert playlist.video_urls == [
|
68 |
"https://www.youtube.com/watch?v=ujTCoH21GlA",
|
69 |
"https://www.youtube.com/watch?v=45ryDIPHdGg",
|
|
|
78 |
"https://www.youtube.com/watch?v=g1Zbuk1gAfk",
|
79 |
"https://www.youtube.com/watch?v=zixd-si9Q-o",
|
80 |
]
|
81 |
+
request_get.assert_called()
|
82 |
|
83 |
|
84 |
+
@mock.patch("pytube.request.get")
|
85 |
+
def test_html(request_get, playlist_html):
|
86 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
87 |
request_get.return_value = playlist_html
|
88 |
playlist = Playlist(url)
|
89 |
+
assert playlist.html == playlist_html
|
90 |
request_get.assert_called()
|
91 |
+
|
92 |
+
|
93 |
+
@mock.patch("pytube.request.get")
|
94 |
+
def test_repr(request_get, playlist_html):
|
95 |
+
url = "https://www.fakeurl.com/playlist?list=whatever"
|
96 |
+
request_get.return_value = playlist_html
|
97 |
+
playlist = Playlist(url)
|
98 |
assert (
|
99 |
repr(playlist) == "["
|
100 |
"'https://www.youtube.com/watch?v=ujTCoH21GlA', "
|
|
|
111 |
"'https://www.youtube.com/watch?v=zixd-si9Q-o'"
|
112 |
"]"
|
113 |
)
|
114 |
+
request_get.assert_called()
|
115 |
|
116 |
|
117 |
+
@mock.patch("pytube.request.get")
|
118 |
def test_sequence(request_get, playlist_html):
|
119 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
120 |
request_get.return_value = playlist_html
|
121 |
playlist = Playlist(url)
|
|
|
122 |
assert playlist[0] == "https://www.youtube.com/watch?v=ujTCoH21GlA"
|
123 |
assert len(playlist) == 12
|
124 |
|
125 |
|
126 |
+
@mock.patch("pytube.request.get")
|
127 |
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
|
128 |
def test_videos(youtube, request_get, playlist_html):
|
129 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
130 |
request_get.return_value = playlist_html
|
131 |
playlist = Playlist(url)
|
|
|
|
|
132 |
assert len(list(playlist.videos)) == 12
|
133 |
+
request_get.assert_called()
|
134 |
|
135 |
|
136 |
+
@mock.patch("pytube.request.get")
|
137 |
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
|
138 |
def test_load_more(youtube, request_get, playlist_html):
|
139 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
|
|
142 |
'{"content_html":"", "load_more_widget_html":""}',
|
143 |
]
|
144 |
playlist = Playlist(url)
|
|
|
|
|
145 |
assert len(list(playlist.videos)) == 12
|
146 |
+
request_get.assert_called()
|
147 |
|
148 |
|
149 |
+
@mock.patch("pytube.request.get")
|
150 |
@mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
|
151 |
def test_proxy(install_proxy, request_get):
|
152 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
|
|
155 |
install_proxy.assert_called_with({"http": "things"})
|
156 |
|
157 |
|
158 |
+
@mock.patch("pytube.request.get")
|
159 |
def test_trimmed(request_get, playlist_html):
|
160 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
161 |
request_get.return_value = playlist_html
|
162 |
playlist = Playlist(url)
|
|
|
|
|
163 |
trimmed = list(playlist.trimmed("1BYu65vLKdA"))
|
164 |
assert trimmed == [
|
165 |
"https://www.youtube.com/watch?v=ujTCoH21GlA",
|
166 |
"https://www.youtube.com/watch?v=45ryDIPHdGg",
|
167 |
]
|
168 |
+
assert request_get.call_count == 1
|
169 |
|
170 |
|
171 |
+
@mock.patch("pytube.request.get")
|
172 |
+
@mock.patch("pytube.request.post")
|
173 |
+
def test_playlist_failed_pagination(request_post, request_get, playlist_long_html):
|
174 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
175 |
request_get.side_effect = [
|
176 |
playlist_long_html,
|
177 |
+
]
|
178 |
+
request_post.side_effect = [
|
179 |
+
"{}"
|
180 |
]
|
181 |
playlist = Playlist(url)
|
182 |
video_urls = playlist.video_urls
|
183 |
assert len(video_urls) == 100
|
184 |
+
assert request_get.call_count == 1
|
185 |
+
assert request_post.call_count == 1
|
186 |
# TODO: Cannot get this test to work probably
|
187 |
# request_get.assert_called_with(
|
188 |
# "https://www.youtube.com/browse_ajax?ctoken" # noqa
|
|
|
195 |
# ) # noqa
|
196 |
|
197 |
|
198 |
+
@mock.patch("pytube.request.get")
|
199 |
+
@mock.patch("pytube.request.post")
|
200 |
+
def test_playlist_pagination(request_post, request_get, playlist_html, playlist_long_html):
|
201 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
202 |
request_get.side_effect = [
|
203 |
+
playlist_long_html
|
204 |
+
]
|
205 |
+
request_post.side_effect = [
|
206 |
'{"content_html":"<a '
|
207 |
'href=\\"/watch?v=BcWz41-4cDk&feature=plpp_video&ved'
|
208 |
'=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", '
|
|
|
211 |
]
|
212 |
playlist = Playlist(url)
|
213 |
assert len(playlist.video_urls) == 100
|
214 |
+
assert request_get.call_count == 1
|
215 |
+
assert request_post.call_count == 1
|
216 |
|
217 |
|
218 |
+
@mock.patch("pytube.request.get")
|
219 |
def test_trimmed_pagination(request_get, playlist_html, playlist_long_html):
|
220 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
221 |
request_get.side_effect = [
|
|
|
232 |
|
233 |
|
234 |
# TODO: Test case not clear to me
|
235 |
+
@mock.patch("pytube.request.get")
|
236 |
def test_trimmed_pagination_not_found(
|
237 |
request_get, playlist_html, playlist_long_html
|
238 |
):
|
|
|
251 |
|
252 |
|
253 |
# test case for playlist with submenus
|
254 |
+
@mock.patch("pytube.request.get")
|
255 |
def test_playlist_submenu(
|
256 |
request_get, playlist_submenu_html):
|
257 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|