nficano commited on
Commit
8c5dda0
·
unverified ·
2 Parent(s): 3e8ab2b 3640c7f

Merge pull request #230 from billd100/fix/age-restricted-videos-without-signature

Browse files
Files changed (3) hide show
  1. pytube/__main__.py +9 -4
  2. pytube/extract.py +12 -5
  3. pytube/mixins.py +7 -2
pytube/__main__.py CHANGED
@@ -109,7 +109,7 @@ class YouTube(object):
109
  self.player_config_args = self.vid_info
110
  else:
111
  self.player_config_args = extract.get_ytplayer_config(
112
- self.watch_html,
113
  )['args']
114
 
115
  # https://github.com/nficano/pytube/issues/165
@@ -123,8 +123,13 @@ class YouTube(object):
123
  mixins.apply_descrambler(self.vid_info, fmt)
124
  mixins.apply_descrambler(self.player_config_args, fmt)
125
 
126
- # apply the signature to the download url.
127
- mixins.apply_signature(self.player_config_args, fmt, self.js)
 
 
 
 
 
128
 
129
  # build instances of :class:`Stream <Stream>`
130
  self.initialize_stream_objects(fmt)
@@ -157,7 +162,7 @@ class YouTube(object):
157
  )
158
  self.vid_info = request.get(self.vid_info_url)
159
  if not self.age_restricted:
160
- self.js_url = extract.js_url(self.watch_html)
161
  self.js = request.get(self.js_url)
162
 
163
  def initialize_stream_objects(self, fmt):
 
109
  self.player_config_args = self.vid_info
110
  else:
111
  self.player_config_args = extract.get_ytplayer_config(
112
+ self.watch_html
113
  )['args']
114
 
115
  # https://github.com/nficano/pytube/issues/165
 
123
  mixins.apply_descrambler(self.vid_info, fmt)
124
  mixins.apply_descrambler(self.player_config_args, fmt)
125
 
126
+ try:
127
+ mixins.apply_signature(self.player_config_args, fmt, self.js)
128
+ except TypeError:
129
+ self.js_url = extract.js_url(
130
+ self.embed_html, self.age_restricted)
131
+ self.js = request.get(self.js_url)
132
+ mixins.apply_signature(self.player_config_args, fmt, self.js)
133
 
134
  # build instances of :class:`Stream <Stream>`
135
  self.initialize_stream_objects(fmt)
 
162
  )
163
  self.vid_info = request.get(self.vid_info_url)
164
  if not self.age_restricted:
165
+ self.js_url = extract.js_url(self.watch_html, self.age_restricted)
166
  self.js = request.get(self.js_url)
167
 
168
  def initialize_stream_objects(self, fmt):
pytube/extract.py CHANGED
@@ -111,7 +111,7 @@ def video_info_url(
111
  return 'https://youtube.com/get_video_info?' + urlencode(params)
112
 
113
 
114
- def js_url(watch_html):
115
  """Get the base JavaScript url.
116
 
117
  Construct the base JavaScript url, which contains the decipher
@@ -119,9 +119,11 @@ def js_url(watch_html):
119
 
120
  :param str watch_html:
121
  The html contents of the watch page.
 
 
122
 
123
  """
124
- ytplayer_config = get_ytplayer_config(watch_html)
125
  base_js = ytplayer_config['assets']['js']
126
  return 'https://youtube.com' + base_js
127
 
@@ -150,7 +152,7 @@ def mime_type_codec(mime_type_codec):
150
  return mime_type, [c.strip() for c in codecs.split(',')]
151
 
152
 
153
- def get_ytplayer_config(watch_html):
154
  """Get the YouTube player configuration data from the watch html.
155
 
156
  Extract the ``ytplayer_config``, which is json data embedded within the
@@ -159,10 +161,15 @@ def get_ytplayer_config(watch_html):
159
 
160
  :param str watch_html:
161
  The html contents of the watch page.
 
 
162
  :rtype: str
163
  :returns:
164
  Substring of the html containing the encoded manifest data.
165
  """
166
- pattern = r';ytplayer\.config\s*=\s*({.*?});'
167
- yt_player_config = regex_search(pattern, watch_html, group=1)
 
 
 
168
  return json.loads(yt_player_config)
 
111
  return 'https://youtube.com/get_video_info?' + urlencode(params)
112
 
113
 
114
+ def js_url(html, age_restricted=False):
115
  """Get the base JavaScript url.
116
 
117
  Construct the base JavaScript url, which contains the decipher
 
119
 
120
  :param str watch_html:
121
  The html contents of the watch page.
122
+ :param bool age_restricted:
123
+ Is video age restricted.
124
 
125
  """
126
+ ytplayer_config = get_ytplayer_config(html, age_restricted)
127
  base_js = ytplayer_config['assets']['js']
128
  return 'https://youtube.com' + base_js
129
 
 
152
  return mime_type, [c.strip() for c in codecs.split(',')]
153
 
154
 
155
+ def get_ytplayer_config(html, age_restricted=False):
156
  """Get the YouTube player configuration data from the watch html.
157
 
158
  Extract the ``ytplayer_config``, which is json data embedded within the
 
161
 
162
  :param str watch_html:
163
  The html contents of the watch page.
164
+ :param bool age_restricted:
165
+ Is video age restricted.
166
  :rtype: str
167
  :returns:
168
  Substring of the html containing the encoded manifest data.
169
  """
170
+ if age_restricted:
171
+ pattern = r";yt\.setConfig\(\{'PLAYER_CONFIG':\s*({.*})(,'EXPERIMENT_FLAGS'|;)" # noqa: E501
172
+ else:
173
+ pattern = r';ytplayer\.config\s*=\s*({.*?});'
174
+ yt_player_config = regex_search(pattern, html, group=1)
175
  return json.loads(yt_player_config)
pytube/mixins.py CHANGED
@@ -33,11 +33,16 @@ def apply_signature(config_args, fmt, js):
33
  if 'signature=' in url:
34
  # For certain videos, YouTube will just provide them pre-signed, in
35
  # which case there's no real magic to download them and we can skip
36
- # the whole signature decrambling entirely.
37
  logger.debug('signature found, skip decipher')
38
  continue
39
 
40
- signature = cipher.get_signature(js, stream['s'])
 
 
 
 
 
41
 
42
  logger.debug(
43
  'finished descrambling signature for itag=%s\n%s',
 
33
  if 'signature=' in url:
34
  # For certain videos, YouTube will just provide them pre-signed, in
35
  # which case there's no real magic to download them and we can skip
36
+ # the whole signature descrambling entirely.
37
  logger.debug('signature found, skip decipher')
38
  continue
39
 
40
+ if js is not None:
41
+ signature = cipher.get_signature(js, stream['s'])
42
+ else:
43
+ # signature not present in url (line 33), need js to descramble
44
+ # TypeError caught in __main__
45
+ raise TypeError('JS is None')
46
 
47
  logger.debug(
48
  'finished descrambling signature for itag=%s\n%s',