Taylor Fox Dahlin commited on
Commit
eec6f64
ยท
unverified ยท
1 Parent(s): 8464dc1

Fix for #763 (#767)

Browse files

* Fix KeyError: 'assets', and minor refactor
* Fixed tests. Changes to YT broke a single test, and updating the mock had cascading effects on other tests. This updates the tests to reflect a more recent YT page, and introduces os-agnostic file paths for testing on windows.
* Fixed flake8 issues.

pytube/__main__.py CHANGED
@@ -293,7 +293,7 @@ class YouTube:
293
  """
294
  return self.player_response.get("videoDetails", {}).get(
295
  "shortDescription"
296
- ) or extract._get_vid_descr(self.watch_html)
297
 
298
  @property
299
  def rating(self) -> float:
 
293
  """
294
  return self.player_response.get("videoDetails", {}).get(
295
  "shortDescription"
296
+ )
297
 
298
  @property
299
  def rating(self) -> float:
pytube/extract.py CHANGED
@@ -4,11 +4,9 @@ import json
4
  import logging
5
  import re
6
  from collections import OrderedDict
7
- from html.parser import HTMLParser
8
  from typing import Any
9
  from typing import Dict
10
  from typing import List
11
- from typing import Optional
12
  from typing import Tuple
13
  from urllib.parse import parse_qs
14
  from urllib.parse import parse_qsl
@@ -17,7 +15,6 @@ from urllib.parse import unquote
17
  from urllib.parse import urlencode
18
 
19
  from pytube.cipher import Cipher
20
- from pytube.exceptions import HTMLParseError
21
  from pytube.exceptions import LiveStreamError
22
  from pytube.exceptions import RegexMatchError
23
  from pytube.helpers import regex_search
@@ -25,36 +22,6 @@ from pytube.helpers import regex_search
25
  logger = logging.getLogger(__name__)
26
 
27
 
28
- class PytubeHTMLParser(HTMLParser):
29
- in_vid_descr = False
30
- in_vid_descr_br = False
31
- vid_descr = ""
32
-
33
- def handle_starttag(self, tag, attrs):
34
- if tag == "p":
35
- for attr in attrs:
36
- if attr[0] == "id" and attr[1] == "eow-description":
37
- self.in_vid_descr = True
38
-
39
- def handle_endtag(self, tag):
40
- if self.in_vid_descr and tag == "p":
41
- self.in_vid_descr = False
42
-
43
- def handle_startendtag(self, tag, attrs):
44
- if self.in_vid_descr and tag == "br":
45
- self.in_vid_descr_br = True
46
-
47
- def handle_data(self, data):
48
- if self.in_vid_descr_br:
49
- self.vid_descr += f"\n{data}"
50
- self.in_vid_descr_br = False
51
- elif self.in_vid_descr:
52
- self.vid_descr += data
53
-
54
- def error(self, message):
55
- raise HTMLParseError(message)
56
-
57
-
58
  def is_age_restricted(watch_html: str) -> bool:
59
  """Check if content is age restricted.
60
 
@@ -151,7 +118,7 @@ def js_url(html: str) -> str:
151
  :param str html:
152
  The html contents of the watch page.
153
  """
154
- base_js = get_ytplayer_config(html)["assets"]["js"]
155
  return "https://youtube.com" + base_js
156
 
157
 
@@ -182,6 +149,31 @@ def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
182
  return mime_type, [c.strip() for c in codecs.split(",")]
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def get_ytplayer_config(html: str) -> Any:
186
  """Get the YouTube player configuration data from the watch html.
187
 
@@ -215,13 +207,6 @@ def get_ytplayer_config(html: str) -> Any:
215
  )
216
 
217
 
218
- def _get_vid_descr(html: Optional[str]) -> str:
219
- html_parser = PytubeHTMLParser()
220
- if html:
221
- html_parser.feed(html)
222
- return html_parser.vid_descr
223
-
224
-
225
  def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
226
  """Apply the decrypted signature to the stream manifest.
227
 
@@ -316,11 +301,11 @@ def apply_descrambler(stream_data: Dict, key: str) -> None:
316
  except KeyError:
317
  cipher_url = [
318
  parse_qs(
319
- formats[i][
320
  "cipher" if "cipher" in data.keys() else "signatureCipher"
321
  ]
322
  )
323
- for i, data in enumerate(formats)
324
  ]
325
  stream_data[key] = [
326
  {
 
4
  import logging
5
  import re
6
  from collections import OrderedDict
 
7
  from typing import Any
8
  from typing import Dict
9
  from typing import List
 
10
  from typing import Tuple
11
  from urllib.parse import parse_qs
12
  from urllib.parse import parse_qsl
 
15
  from urllib.parse import urlencode
16
 
17
  from pytube.cipher import Cipher
 
18
  from pytube.exceptions import LiveStreamError
19
  from pytube.exceptions import RegexMatchError
20
  from pytube.helpers import regex_search
 
22
  logger = logging.getLogger(__name__)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def is_age_restricted(watch_html: str) -> bool:
26
  """Check if content is age restricted.
27
 
 
118
  :param str html:
119
  The html contents of the watch page.
120
  """
121
+ base_js = get_ytplayer_js(html)
122
  return "https://youtube.com" + base_js
123
 
124
 
 
149
  return mime_type, [c.strip() for c in codecs.split(",")]
150
 
151
 
152
+ def get_ytplayer_js(html: str) -> Any:
153
+ """Get the YouTube player base JavaScript path.
154
+
155
+ :param str html
156
+ The html contents of the watch page.
157
+ :rtype: str
158
+ :returns:
159
+ Path to YouTube's base.js file.
160
+ """
161
+ js_url_patterns = [
162
+ r"\"jsUrl\":\"([^\"]*)\"",
163
+ ]
164
+ for pattern in js_url_patterns:
165
+ regex = re.compile(pattern)
166
+ function_match = regex.search(html)
167
+ if function_match:
168
+ logger.debug("finished regex search, matched: %s", pattern)
169
+ yt_player_js = function_match.group(1)
170
+ return yt_player_js
171
+
172
+ raise RegexMatchError(
173
+ caller="get_ytplayer_js", pattern="js_url_patterns"
174
+ )
175
+
176
+
177
  def get_ytplayer_config(html: str) -> Any:
178
  """Get the YouTube player configuration data from the watch html.
179
 
 
207
  )
208
 
209
 
 
 
 
 
 
 
 
210
  def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
211
  """Apply the decrypted signature to the stream manifest.
212
 
 
301
  except KeyError:
302
  cipher_url = [
303
  parse_qs(
304
+ data[
305
  "cipher" if "cipher" in data.keys() else "signatureCipher"
306
  ]
307
  )
308
+ for data in formats
309
  ]
310
  stream_data[key] = [
311
  {
tests/mocks/yt-video-9bZkp7q19f0.json.gz CHANGED
Binary files a/tests/mocks/yt-video-9bZkp7q19f0.json.gz and b/tests/mocks/yt-video-9bZkp7q19f0.json.gz differ
 
tests/test_captions.py CHANGED
@@ -1,4 +1,5 @@
1
  # -*- coding: utf-8 -*-
 
2
  from unittest import mock
3
  from unittest.mock import MagicMock
4
  from unittest.mock import mock_open
@@ -71,7 +72,7 @@ def test_download(srt):
71
  )
72
  caption.download("title")
73
  assert (
74
- open_mock.call_args_list[0][0][0].split("/")[-1] == "title (en).srt"
75
  )
76
 
77
 
@@ -89,7 +90,7 @@ def test_download_with_prefix(srt):
89
  )
90
  caption.download("title", filename_prefix="1 ")
91
  assert (
92
- open_mock.call_args_list[0][0][0].split("/")[-1]
93
  == "1 title (en).srt"
94
  )
95
 
@@ -108,7 +109,7 @@ def test_download_with_output_path(srt):
108
  }
109
  )
110
  file_path = caption.download("title", output_path="blah")
111
- assert file_path == "/target/title (en).srt"
112
  captions.target_directory.assert_called_with("blah")
113
 
114
 
@@ -126,7 +127,7 @@ def test_download_xml_and_trim_extension(xml):
126
  )
127
  caption.download("title.xml", srt=False)
128
  assert (
129
- open_mock.call_args_list[0][0][0].split("/")[-1] == "title (en).xml"
130
  )
131
 
132
 
 
1
  # -*- coding: utf-8 -*-
2
+ import os
3
  from unittest import mock
4
  from unittest.mock import MagicMock
5
  from unittest.mock import mock_open
 
72
  )
73
  caption.download("title")
74
  assert (
75
+ open_mock.call_args_list[0][0][0].split(os.path.sep)[-1] == "title (en).srt"
76
  )
77
 
78
 
 
90
  )
91
  caption.download("title", filename_prefix="1 ")
92
  assert (
93
+ open_mock.call_args_list[0][0][0].split(os.path.sep)[-1]
94
  == "1 title (en).srt"
95
  )
96
 
 
109
  }
110
  )
111
  file_path = caption.download("title", output_path="blah")
112
+ assert file_path == os.path.join("/target","title (en).srt")
113
  captions.target_directory.assert_called_with("blah")
114
 
115
 
 
127
  )
128
  caption.download("title.xml", srt=False)
129
  assert (
130
+ open_mock.call_args_list[0][0][0].split(os.path.sep)[-1] == "title (en).xml"
131
  )
132
 
133
 
tests/test_extract.py CHANGED
@@ -38,7 +38,7 @@ def test_info_url_age_restricted(cipher_signature):
38
 
39
  def test_js_url(cipher_signature):
40
  expected = (
41
- "https://youtube.com/yts/jsbin/player_ias-vflWQEEag/en_US/base.js"
42
  )
43
  result = extract.js_url(cipher_signature.watch_html)
44
  assert expected == result
@@ -52,25 +52,6 @@ def test_non_age_restricted(cipher_signature):
52
  assert not extract.is_age_restricted(cipher_signature.watch_html)
53
 
54
 
55
- def test_get_vid_desc(cipher_signature):
56
- expected = (
57
- "PSY - โ€˜I LUV ITโ€™ M/V @ https://youtu.be/Xvjnoagk6GU\n"
58
- "PSY - โ€˜New Faceโ€™ M/V @https://youtu.be/OwJPPaEyqhI\n"
59
- "PSY - 8TH ALBUM '4X2=8' on iTunes @\n"
60
- "https://smarturl.it/PSY_8thAlbum\n"
61
- "PSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) on iTunes @ http://smarturl.it/PsyGangnam\n"
62
- "#PSY #์‹ธ์ด #GANGNAMSTYLE #๊ฐ•๋‚จ์Šคํƒ€์ผ\n"
63
- "More about PSY@\nhttp://www.youtube.com/officialpsy\n"
64
- "http://www.facebook.com/officialpsy\n"
65
- "http://twitter.com/psy_oppa\n"
66
- "https://www.instagram.com/42psy42\n"
67
- "http://iTunes.com/PSY\n"
68
- "http://sptfy.com/PSY\n"
69
- "http://weibo.com/psyoppa"
70
- )
71
- assert extract._get_vid_descr(cipher_signature.watch_html) == expected
72
-
73
-
74
  def test_mime_type_codec():
75
  mime_type, mime_subtype = extract.mime_type_codec(
76
  'audio/webm; codecs="opus"'
 
38
 
39
  def test_js_url(cipher_signature):
40
  expected = (
41
+ "https://youtube.com/s/player/4a1799bd/player_ias.vflset/en_US/base.js"
42
  )
43
  result = extract.js_url(cipher_signature.watch_html)
44
  assert expected == result
 
52
  assert not extract.is_age_restricted(cipher_signature.watch_html)
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def test_mime_type_codec():
56
  mime_type, mime_subtype = extract.mime_type_codec(
57
  'audio/webm; codecs="opus"'
tests/test_helpers.py CHANGED
@@ -1,4 +1,5 @@
1
  # -*- coding: utf-8 -*-
 
2
  from unittest import mock
3
 
4
  import pytest
@@ -61,7 +62,7 @@ def test_cache():
61
  @mock.patch("os.getcwd", return_value="/cwd")
62
  @mock.patch("os.makedirs")
63
  def test_target_directory_with_relative_path(_, __, makedirs): # noqa: PT019
64
- assert target_directory("test") == "/cwd/test"
65
  makedirs.assert_called()
66
 
67
 
 
1
  # -*- coding: utf-8 -*-
2
+ import os
3
  from unittest import mock
4
 
5
  import pytest
 
62
  @mock.patch("os.getcwd", return_value="/cwd")
63
  @mock.patch("os.makedirs")
64
  def test_target_directory_with_relative_path(_, __, makedirs): # noqa: PT019
65
+ assert target_directory("test") == os.path.join("/cwd", "test")
66
  makedirs.assert_called()
67
 
68
 
tests/test_query.py CHANGED
@@ -144,7 +144,7 @@ def test_get_highest_resolution(cipher_signature):
144
  def test_filter_is_dash(cipher_signature):
145
  streams = cipher_signature.streams.filter(is_dash=False)
146
  itags = [s.itag for s in streams]
147
- assert itags == [18, 398, 397, 396, 395, 394]
148
 
149
 
150
  def test_get_audio_only(cipher_signature):
@@ -156,13 +156,13 @@ def test_get_audio_only_with_subtype(cipher_signature):
156
 
157
 
158
  def test_sequence(cipher_signature):
159
- assert len(cipher_signature.streams) == 22
160
  assert cipher_signature.streams[0] is not None
161
 
162
 
163
  def test_otf(cipher_signature):
164
  non_otf = cipher_signature.streams.otf()
165
- assert len(non_otf) == 22
166
 
167
  otf = cipher_signature.streams.otf(True)
168
  assert len(otf) == 0
 
144
  def test_filter_is_dash(cipher_signature):
145
  streams = cipher_signature.streams.filter(is_dash=False)
146
  itags = [s.itag for s in streams]
147
+ assert itags == [18, 399, 398, 397, 396, 395, 394]
148
 
149
 
150
  def test_get_audio_only(cipher_signature):
 
156
 
157
 
158
  def test_sequence(cipher_signature):
159
+ assert len(cipher_signature.streams) == 23
160
  assert cipher_signature.streams[0] is not None
161
 
162
 
163
  def test_otf(cipher_signature):
164
  non_otf = cipher_signature.streams.otf()
165
+ assert len(non_otf) == 23
166
 
167
  otf = cipher_signature.streams.otf(True)
168
  assert len(otf) == 0
tests/test_streams.py CHANGED
@@ -63,7 +63,7 @@ def test_title(cipher_signature):
63
 
64
 
65
  def test_expiration(cipher_signature):
66
- assert cipher_signature.streams[0].expiration == datetime(2020, 1, 16, 5, 12, 5)
67
 
68
 
69
  def test_caption_tracks(presigned_video):
@@ -93,27 +93,9 @@ def test_description(cipher_signature):
93
  )
94
  assert cipher_signature.description == expected
95
 
96
- cipher_signature.player_response = {}
97
- expected = (
98
- "PSY - โ€˜I LUV ITโ€™ M/V @ https://youtu.be/Xvjnoagk6GU\n"
99
- "PSY - โ€˜New Faceโ€™ M/V @https://youtu.be/OwJPPaEyqhI\n"
100
- "PSY - 8TH ALBUM '4X2=8' on iTunes @\n"
101
- "https://smarturl.it/PSY_8thAlbum\n"
102
- "PSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) on iTunes @ http://smarturl.it/PsyGangnam\n"
103
- "#PSY #์‹ธ์ด #GANGNAMSTYLE #๊ฐ•๋‚จ์Šคํƒ€์ผ\n"
104
- "More about PSY@\nhttp://www.youtube.com/officialpsy\n"
105
- "http://www.facebook.com/officialpsy\n"
106
- "http://twitter.com/psy_oppa\n"
107
- "https://www.instagram.com/42psy42\n"
108
- "http://iTunes.com/PSY\n"
109
- "http://sptfy.com/PSY\n"
110
- "http://weibo.com/psyoppa"
111
- )
112
- assert cipher_signature.description == expected
113
-
114
 
115
  def test_rating(cipher_signature):
116
- assert cipher_signature.rating == 4.522203
117
 
118
 
119
  def test_length(cipher_signature):
@@ -121,7 +103,7 @@ def test_length(cipher_signature):
121
 
122
 
123
  def test_views(cipher_signature):
124
- assert cipher_signature.views == 3494704859
125
 
126
 
127
  @mock.patch(
@@ -149,7 +131,10 @@ def test_download_with_prefix(cipher_signature):
149
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
150
  stream = cipher_signature.streams[0]
151
  file_path = stream.download(filename_prefix="prefix")
152
- assert file_path == "/target/prefixPSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) MV.mp4"
 
 
 
153
 
154
 
155
  @mock.patch(
@@ -164,7 +149,10 @@ def test_download_with_filename(cipher_signature):
164
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
165
  stream = cipher_signature.streams[0]
166
  file_path = stream.download(filename="cool name bro")
167
- assert file_path == "/target/cool name bro.mp4"
 
 
 
168
 
169
 
170
  @mock.patch(
@@ -181,7 +169,10 @@ def test_download_with_existing(cipher_signature):
181
  stream = cipher_signature.streams[0]
182
  os.path.getsize = Mock(return_value=stream.filesize)
183
  file_path = stream.download()
184
- assert file_path == "/target/PSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) MV.mp4"
 
 
 
185
  assert not request.stream.called
186
 
187
 
@@ -199,7 +190,10 @@ def test_download_with_existing_no_skip(cipher_signature):
199
  stream = cipher_signature.streams[0]
200
  os.path.getsize = Mock(return_value=stream.filesize)
201
  file_path = stream.download(skip_existing=False)
202
- assert file_path == "/target/PSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) MV.mp4"
 
 
 
203
  assert request.stream.called
204
 
205
 
 
63
 
64
 
65
  def test_expiration(cipher_signature):
66
+ assert cipher_signature.streams[0].expiration == datetime(2020, 10, 24, 11, 7, 41)
67
 
68
 
69
  def test_caption_tracks(presigned_video):
 
93
  )
94
  assert cipher_signature.description == expected
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  def test_rating(cipher_signature):
98
+ assert cipher_signature.rating == 4.5375643
99
 
100
 
101
  def test_length(cipher_signature):
 
103
 
104
 
105
  def test_views(cipher_signature):
106
+ assert cipher_signature.views == 3830838693
107
 
108
 
109
  @mock.patch(
 
131
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
132
  stream = cipher_signature.streams[0]
133
  file_path = stream.download(filename_prefix="prefix")
134
+ assert file_path == os.path.join(
135
+ "/target",
136
+ "prefixPSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) MV.mp4"
137
+ )
138
 
139
 
140
  @mock.patch(
 
149
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
150
  stream = cipher_signature.streams[0]
151
  file_path = stream.download(filename="cool name bro")
152
+ assert file_path == os.path.join(
153
+ "/target",
154
+ "cool name bro.mp4"
155
+ )
156
 
157
 
158
  @mock.patch(
 
169
  stream = cipher_signature.streams[0]
170
  os.path.getsize = Mock(return_value=stream.filesize)
171
  file_path = stream.download()
172
+ assert file_path == os.path.join(
173
+ "/target",
174
+ "PSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) MV.mp4"
175
+ )
176
  assert not request.stream.called
177
 
178
 
 
190
  stream = cipher_signature.streams[0]
191
  os.path.getsize = Mock(return_value=stream.filesize)
192
  file_path = stream.download(skip_existing=False)
193
+ assert file_path == os.path.join(
194
+ "/target",
195
+ "PSY - GANGNAM STYLE(๊ฐ•๋‚จ์Šคํƒ€์ผ) MV.mp4"
196
+ )
197
  assert request.stream.called
198
 
199