Taylor Fox Dahlin
commited on
Better unavailable video handling (#791)
Browse filesAdded private video & unavailable recording checks, in order to have more descriptive errors.
- pytube/__main__.py +7 -5
- pytube/exceptions.py +22 -0
- pytube/extract.py +38 -0
- tests/conftest.py +14 -0
- tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz +0 -0
- tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz +0 -0
- tests/test_exceptions.py +46 -0
- tests/test_extract.py +16 -0
- tests/test_streams.py +1 -2
pytube/__main__.py
CHANGED
@@ -20,7 +20,9 @@ from pytube import extract
|
|
20 |
from pytube import request
|
21 |
from pytube import Stream
|
22 |
from pytube import StreamQuery
|
|
|
23 |
from pytube.exceptions import VideoUnavailable
|
|
|
24 |
from pytube.extract import apply_descrambler
|
25 |
from pytube.extract import apply_signature
|
26 |
from pytube.extract import get_ytplayer_config
|
@@ -168,11 +170,11 @@ class YouTube:
|
|
168 |
raise VideoUnavailable(video_id=self.video_id)
|
169 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
170 |
|
171 |
-
if (
|
172 |
-
|
173 |
-
|
174 |
-
):
|
175 |
-
raise
|
176 |
|
177 |
if self.age_restricted:
|
178 |
if not self.embed_html:
|
|
|
20 |
from pytube import request
|
21 |
from pytube import Stream
|
22 |
from pytube import StreamQuery
|
23 |
+
from pytube.exceptions import RecordingUnavailable
|
24 |
from pytube.exceptions import VideoUnavailable
|
25 |
+
from pytube.exceptions import VideoPrivate
|
26 |
from pytube.extract import apply_descrambler
|
27 |
from pytube.extract import apply_signature
|
28 |
from pytube.extract import get_ytplayer_config
|
|
|
170 |
raise VideoUnavailable(video_id=self.video_id)
|
171 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
172 |
|
173 |
+
if extract.is_private(self.watch_html):
|
174 |
+
raise VideoPrivate(video_id=self.video_id)
|
175 |
+
|
176 |
+
if not extract.recording_available(self.watch_html):
|
177 |
+
raise RecordingUnavailable(video_id=self.video_id)
|
178 |
|
179 |
if self.age_restricted:
|
180 |
if not self.embed_html:
|
pytube/exceptions.py
CHANGED
@@ -58,5 +58,27 @@ class VideoUnavailable(PytubeError):
|
|
58 |
self.video_id = video_id
|
59 |
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
class HTMLParseError(PytubeError):
|
62 |
"""HTML could not be parsed"""
|
|
|
58 |
self.video_id = video_id
|
59 |
|
60 |
|
61 |
+
class VideoPrivate(ExtractError):
|
62 |
+
def __init__(self, video_id: str):
|
63 |
+
"""
|
64 |
+
:param str video_id:
|
65 |
+
A YouTube video identifier.
|
66 |
+
"""
|
67 |
+
super().__init__('%s is a private video' % video_id)
|
68 |
+
self.video_id = video_id
|
69 |
+
|
70 |
+
|
71 |
+
class RecordingUnavailable(ExtractError):
|
72 |
+
def __init__(self, video_id: str):
|
73 |
+
"""
|
74 |
+
:param str video_id:
|
75 |
+
A YouTube video identifier.
|
76 |
+
"""
|
77 |
+
super().__init__(
|
78 |
+
'%s does not have a live stream recording available' % video_id
|
79 |
+
)
|
80 |
+
self.video_id = video_id
|
81 |
+
|
82 |
+
|
83 |
class HTMLParseError(PytubeError):
|
84 |
"""HTML could not be parsed"""
|
pytube/extract.py
CHANGED
@@ -22,6 +22,44 @@ from pytube.helpers import regex_search
|
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def is_age_restricted(watch_html: str) -> bool:
|
26 |
"""Check if content is age restricted.
|
27 |
|
|
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
24 |
|
25 |
+
def recording_available(watch_html):
|
26 |
+
"""Check if live stream recording is available.
|
27 |
+
|
28 |
+
:param str watch_html:
|
29 |
+
The html contents of the watch page.
|
30 |
+
:rtype: bool
|
31 |
+
:returns:
|
32 |
+
Whether or not the content is private.
|
33 |
+
"""
|
34 |
+
unavailable_strings = [
|
35 |
+
'This live stream recording is not available.'
|
36 |
+
]
|
37 |
+
for string in unavailable_strings:
|
38 |
+
if string in watch_html:
|
39 |
+
return False
|
40 |
+
return True
|
41 |
+
|
42 |
+
|
43 |
+
def is_private(watch_html):
|
44 |
+
"""Check if content is private.
|
45 |
+
|
46 |
+
:param str watch_html:
|
47 |
+
The html contents of the watch page.
|
48 |
+
:rtype: bool
|
49 |
+
:returns:
|
50 |
+
Whether or not the content is private.
|
51 |
+
"""
|
52 |
+
private_strings = [
|
53 |
+
"This is a private video. Please sign in to verify that you may see it.",
|
54 |
+
"\"simpleText\":\"Private video\"",
|
55 |
+
"This video is private."
|
56 |
+
]
|
57 |
+
for string in private_strings:
|
58 |
+
if string in watch_html:
|
59 |
+
return True
|
60 |
+
return False
|
61 |
+
|
62 |
+
|
63 |
def is_age_restricted(watch_html: str) -> bool:
|
64 |
"""Check if content is age restricted.
|
65 |
|
tests/conftest.py
CHANGED
@@ -58,6 +58,20 @@ def age_restricted():
|
|
58 |
return load_playback_file(filename)
|
59 |
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
@pytest.fixture
|
62 |
def playlist_html():
|
63 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
|
|
58 |
return load_playback_file(filename)
|
59 |
|
60 |
|
61 |
+
@pytest.fixture
|
62 |
+
def private():
|
63 |
+
"""Youtube instance initialized with video id m8uHb5jIGN8."""
|
64 |
+
filename = "yt-video-m8uHb5jIGN8-html.json.gz"
|
65 |
+
return load_playback_file(filename)
|
66 |
+
|
67 |
+
|
68 |
+
@pytest.fixture
|
69 |
+
def missing_recording():
|
70 |
+
"""Youtube instance initialized with video id 5YceQ8YqYMc."""
|
71 |
+
filename = "yt-video-5YceQ8YqYMc-html.json.gz"
|
72 |
+
return load_playback_file(filename)
|
73 |
+
|
74 |
+
|
75 |
@pytest.fixture
|
76 |
def playlist_html():
|
77 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz
ADDED
Binary file (529 kB). View file
|
|
tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz
ADDED
Binary file (74.5 kB). View file
|
|
tests/test_exceptions.py
CHANGED
@@ -1,7 +1,13 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
2 |
from pytube.exceptions import LiveStreamError
|
|
|
3 |
from pytube.exceptions import RegexMatchError
|
4 |
from pytube.exceptions import VideoUnavailable
|
|
|
5 |
|
6 |
|
7 |
def test_video_unavailable():
|
@@ -25,3 +31,43 @@ def test_live_stream_error():
|
|
25 |
except LiveStreamError as e:
|
26 |
assert e.video_id == "YLnZklYFe7E" # noqa: PT017
|
27 |
assert str(e) == "YLnZklYFe7E is streaming live and cannot be loaded"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
import pytest
|
3 |
+
from unittest import mock
|
4 |
+
|
5 |
+
from pytube import YouTube
|
6 |
from pytube.exceptions import LiveStreamError
|
7 |
+
from pytube.exceptions import RecordingUnavailable
|
8 |
from pytube.exceptions import RegexMatchError
|
9 |
from pytube.exceptions import VideoUnavailable
|
10 |
+
from pytube.exceptions import VideoPrivate
|
11 |
|
12 |
|
13 |
def test_video_unavailable():
|
|
|
31 |
except LiveStreamError as e:
|
32 |
assert e.video_id == "YLnZklYFe7E" # noqa: PT017
|
33 |
assert str(e) == "YLnZklYFe7E is streaming live and cannot be loaded"
|
34 |
+
|
35 |
+
|
36 |
+
def test_recording_unavailable():
|
37 |
+
try:
|
38 |
+
raise RecordingUnavailable(video_id="5YceQ8YqYMc")
|
39 |
+
except RecordingUnavailable as e:
|
40 |
+
assert e.video_id == "5YceQ8YqYMc" # noqa: PT017
|
41 |
+
assert str(e) == "5YceQ8YqYMc does not have a live stream recording available"
|
42 |
+
|
43 |
+
|
44 |
+
def test_private_error():
|
45 |
+
try:
|
46 |
+
raise VideoPrivate('mRe-514tGMg')
|
47 |
+
except VideoPrivate as e:
|
48 |
+
assert e.video_id == 'mRe-514tGMg' # noqa: PT017
|
49 |
+
assert str(e) == 'mRe-514tGMg is a private video'
|
50 |
+
|
51 |
+
|
52 |
+
def test_raises_video_private(private):
|
53 |
+
with mock.patch('pytube.request.urlopen') as mock_url_open:
|
54 |
+
# Mock the responses to YouTube
|
55 |
+
mock_url_open_object = mock.Mock()
|
56 |
+
mock_url_open_object.read.side_effect = [
|
57 |
+
private['watch_html'].encode('utf-8'),
|
58 |
+
]
|
59 |
+
mock_url_open.return_value = mock_url_open_object
|
60 |
+
with pytest.raises(VideoPrivate):
|
61 |
+
YouTube('https://youtube.com/watch?v=mRe-514tGMg')
|
62 |
+
|
63 |
+
|
64 |
+
def test_raises_recording_unavailable(missing_recording):
|
65 |
+
with mock.patch('pytube.request.urlopen') as mock_url_open:
|
66 |
+
# Mock the responses to YouTube
|
67 |
+
mock_url_open_object = mock.Mock()
|
68 |
+
mock_url_open_object.read.side_effect = [
|
69 |
+
missing_recording['watch_html'].encode('utf-8'),
|
70 |
+
]
|
71 |
+
mock_url_open.return_value = mock_url_open_object
|
72 |
+
with pytest.raises(RecordingUnavailable):
|
73 |
+
YouTube('https://youtube.com/watch?v=5YceQ8YqYMc')
|
tests/test_extract.py
CHANGED
@@ -52,6 +52,22 @@ def test_non_age_restricted(cipher_signature):
|
|
52 |
assert not extract.is_age_restricted(cipher_signature.watch_html)
|
53 |
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def test_mime_type_codec():
|
56 |
mime_type, mime_subtype = extract.mime_type_codec(
|
57 |
'audio/webm; codecs="opus"'
|
|
|
52 |
assert not extract.is_age_restricted(cipher_signature.watch_html)
|
53 |
|
54 |
|
55 |
+
def test_is_private(private):
|
56 |
+
assert extract.is_private(private['watch_html'])
|
57 |
+
|
58 |
+
|
59 |
+
def test_not_is_private(cipher_signature):
|
60 |
+
assert not extract.is_private(cipher_signature.watch_html)
|
61 |
+
|
62 |
+
|
63 |
+
def test_recording_available(cipher_signature):
|
64 |
+
assert extract.recording_available(cipher_signature.watch_html)
|
65 |
+
|
66 |
+
|
67 |
+
def test_not_recording_available(missing_recording):
|
68 |
+
assert not extract.recording_available(missing_recording['watch_html'])
|
69 |
+
|
70 |
+
|
71 |
def test_mime_type_codec():
|
72 |
mime_type, mime_subtype = extract.mime_type_codec(
|
73 |
'audio/webm; codecs="opus"'
|
tests/test_streams.py
CHANGED
@@ -52,8 +52,7 @@ def test_default_filename(cipher_signature):
|
|
52 |
|
53 |
|
54 |
def test_title(cipher_signature):
|
55 |
-
expected = "
|
56 |
-
cipher_signature.player_response = {"videoDetails": {"title": expected}}
|
57 |
assert cipher_signature.title == expected
|
58 |
|
59 |
|
|
|
52 |
|
53 |
|
54 |
def test_title(cipher_signature):
|
55 |
+
expected = "YouTube Rewind 2019: For the Record | #YouTubeRewind"
|
|
|
56 |
assert cipher_signature.title == expected
|
57 |
|
58 |
|