better handling for age restricted videos
Browse files- pytube/__main__.py +10 -9
- pytube/extract.py +4 -1
- tests/conftest.py +1 -1
- tests/generate_fixture.py +1 -0
- tests/mocks/yt-video-irauhITDrsE.json.gz +0 -0
- tests/mocks/yt-video-zRbsm3e2ltw-1507777044.json.gz +0 -0
- tests/test_extract.py +15 -1
pytube/__main__.py
CHANGED
@@ -79,8 +79,7 @@ class YouTube:
|
|
79 |
self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
|
80 |
self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
|
81 |
|
82 |
-
#
|
83 |
-
# (Borg pattern). Boooooo.
|
84 |
self.stream_monostate = Monostate(
|
85 |
on_progress=on_progress_callback, on_complete=on_complete_callback
|
86 |
)
|
@@ -139,7 +138,8 @@ class YouTube:
|
|
139 |
self.player_config_args, fmt, self.js # type: ignore
|
140 |
)
|
141 |
except TypeError:
|
142 |
-
|
|
|
143 |
self.js_url = extract.js_url(self.embed_html, self.age_restricted)
|
144 |
self.js = request.get(self.js_url)
|
145 |
assert self.js is not None
|
@@ -167,14 +167,15 @@ class YouTube:
|
|
167 |
|
168 |
"""
|
169 |
self.watch_html = request.get(url=self.watch_url)
|
170 |
-
if
|
171 |
-
self.watch_html is None
|
172 |
-
or '<img class="icon meh" src="/yts/img' not in self.watch_html
|
173 |
-
):
|
174 |
raise VideoUnavailable(video_id=self.video_id)
|
175 |
-
|
176 |
-
self.embed_html = request.get(url=self.embed_url)
|
177 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
self.vid_info_url = extract.video_info_url(
|
179 |
video_id=self.video_id,
|
180 |
watch_url=self.watch_url,
|
|
|
79 |
self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
|
80 |
self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
|
81 |
|
82 |
+
# Shared between all instances of `Stream` (Borg pattern).
|
|
|
83 |
self.stream_monostate = Monostate(
|
84 |
on_progress=on_progress_callback, on_complete=on_complete_callback
|
85 |
)
|
|
|
138 |
self.player_config_args, fmt, self.js # type: ignore
|
139 |
)
|
140 |
except TypeError:
|
141 |
+
if not self.embed_html:
|
142 |
+
self.embed_html = request.get(url=self.embed_url)
|
143 |
self.js_url = extract.js_url(self.embed_html, self.age_restricted)
|
144 |
self.js = request.get(self.js_url)
|
145 |
assert self.js is not None
|
|
|
167 |
|
168 |
"""
|
169 |
self.watch_html = request.get(url=self.watch_url)
|
170 |
+
if self.watch_html is None:
|
|
|
|
|
|
|
171 |
raise VideoUnavailable(video_id=self.video_id)
|
|
|
|
|
172 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
173 |
+
if not self.age_restricted and 'id="player-unavailable"' in self.watch_html:
|
174 |
+
raise VideoUnavailable(video_id=self.video_id)
|
175 |
+
|
176 |
+
if self.age_restricted:
|
177 |
+
self.embed_html = request.get(url=self.embed_url)
|
178 |
+
|
179 |
self.vid_info_url = extract.video_info_url(
|
180 |
video_id=self.video_id,
|
181 |
watch_url=self.watch_url,
|
pytube/extract.py
CHANGED
@@ -97,7 +97,10 @@ def video_info_url(
|
|
97 |
"""
|
98 |
if age_restricted:
|
99 |
assert embed_html is not None
|
100 |
-
|
|
|
|
|
|
|
101 |
# Here we use ``OrderedDict`` so that the output is consistent between
|
102 |
# Python 2.7+.
|
103 |
eurl = f"https://youtube.googleapis.com/v/{video_id}"
|
|
|
97 |
"""
|
98 |
if age_restricted:
|
99 |
assert embed_html is not None
|
100 |
+
try:
|
101 |
+
sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
|
102 |
+
except RegexMatchError:
|
103 |
+
sts = ""
|
104 |
# Here we use ``OrderedDict`` so that the output is consistent between
|
105 |
# Python 2.7+.
|
106 |
eurl = f"https://youtube.googleapis.com/v/{video_id}"
|
tests/conftest.py
CHANGED
@@ -48,7 +48,7 @@ def presigned_video():
|
|
48 |
@pytest.fixture
|
49 |
def age_restricted():
|
50 |
"""Youtube instance initialized with video id zRbsm3e2ltw."""
|
51 |
-
filename = "yt-video-
|
52 |
return load_playback_file(filename)
|
53 |
|
54 |
|
|
|
48 |
@pytest.fixture
|
49 |
def age_restricted():
|
50 |
"""Youtube instance initialized with video id zRbsm3e2ltw."""
|
51 |
+
filename = "yt-video-irauhITDrsE.json.gz"
|
52 |
return load_playback_file(filename)
|
53 |
|
54 |
|
tests/generate_fixture.py
CHANGED
@@ -19,6 +19,7 @@ output = {
|
|
19 |
"watch_html": yt.watch_html,
|
20 |
"video_info": yt.vid_info,
|
21 |
"js": yt.js,
|
|
|
22 |
}
|
23 |
|
24 |
outpath = path.join(currentdir, "mocks", "yt-video-" + yt.video_id + ".json")
|
|
|
19 |
"watch_html": yt.watch_html,
|
20 |
"video_info": yt.vid_info,
|
21 |
"js": yt.js,
|
22 |
+
"embed_html": yt.embed_html,
|
23 |
}
|
24 |
|
25 |
outpath = path.join(currentdir, "mocks", "yt-video-" + yt.video_id + ".json")
|
tests/mocks/yt-video-irauhITDrsE.json.gz
ADDED
Binary file (38.5 kB). View file
|
|
tests/mocks/yt-video-zRbsm3e2ltw-1507777044.json.gz
DELETED
Binary file (20.6 kB)
|
|
tests/test_extract.py
CHANGED
@@ -12,7 +12,21 @@ def test_extract_video_id():
|
|
12 |
assert video_id == "9bZkp7q19f0"
|
13 |
|
14 |
|
15 |
-
def test_info_url(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
video_info_url = extract.video_info_url(
|
17 |
video_id=cipher_signature.video_id,
|
18 |
watch_url=cipher_signature.watch_url,
|
|
|
12 |
assert video_id == "9bZkp7q19f0"
|
13 |
|
14 |
|
15 |
+
def test_info_url(age_restricted):
|
16 |
+
video_info_url = extract.video_info_url(
|
17 |
+
video_id="QRS8MkLhQmM",
|
18 |
+
watch_url=age_restricted["url"],
|
19 |
+
embed_html=age_restricted["embed_html"],
|
20 |
+
age_restricted=True,
|
21 |
+
)
|
22 |
+
expected = (
|
23 |
+
"https://youtube.com/get_video_info?video_id=QRS8MkLhQmM&eurl"
|
24 |
+
"=https%3A%2F%2Fyoutube.googleapis.com%2Fv%2FQRS8MkLhQmM&sts="
|
25 |
+
)
|
26 |
+
assert video_info_url == expected
|
27 |
+
|
28 |
+
|
29 |
+
def test_info_url_age_restricted(cipher_signature):
|
30 |
video_info_url = extract.video_info_url(
|
31 |
video_id=cipher_signature.video_id,
|
32 |
watch_url=cipher_signature.watch_url,
|