nficano commited on
Commit
98c7ebd
·
2 Parent(s): 9d41628 52e59d1

Merge branch 'KeluDiao-master'

Browse files

* KeluDiao-master:
pep8/cleanup
Changed the coditioning
No bug in previous version
test travis ci
Delete batch.py
Fixed a bug
Create batch.py
Added python 3 compatibility

Files changed (1) hide show
  1. pytube/api.py +40 -21
pytube/api.py CHANGED
@@ -1,6 +1,5 @@
1
  #!/usr/bin/env python
2
  # -*- coding: utf-8 -*-
3
-
4
  import json
5
  import logging
6
  import re
@@ -38,13 +37,13 @@ YT_QUALITY_PROFILES = {
38
 
39
  # The keys corresponding to the quality/codec map above.
40
  YT_QUALITY_PROFILE_KEYS = (
41
- 'extension',
42
- 'resolution',
43
- 'video_codec',
44
- 'profile',
45
- 'video_bitrate',
46
- 'audio_codec',
47
- 'audio_bitrate'
48
  )
49
 
50
 
@@ -85,9 +84,9 @@ class YouTube(object):
85
  def video_id(self):
86
  """Gets the video id by parsing and extracting it from the url."""
87
  parts = urlparse(self._video_url)
88
- qs = getattr(parts, 'query')
89
  if qs:
90
- video_id = parse_qs(qs).get('v')
91
  if video_id:
92
  return video_id.pop()
93
 
@@ -183,8 +182,8 @@ class YouTube(object):
183
  # Check if we have the signature, otherwise we'll need to get the
184
  # cipher from the js.
185
  if "signature=" not in url:
186
- log.debug('signature not in url, attempting to resolve the '
187
- 'cipher...')
188
  signature = self._get_cipher(stream_map["s"][idx], js_url)
189
  url = "{0}&signature={1}".format(url, signature)
190
  self._add_video(url, self.filename, **quality_profile)
@@ -255,7 +254,12 @@ class YouTube(object):
255
  raise PytubeError("Unable to open url: {0}".format(self.url))
256
 
257
  html = response.read()
258
- if "og:restrictions:age" in html:
 
 
 
 
 
259
  raise AgeRestricted("Age restricted video. Unable to download "
260
  "without being signed in.")
261
 
@@ -266,7 +270,7 @@ class YouTube(object):
266
  # do this just so we just can return one object for the video data.
267
  encoded_stream_map = json_object.get("args", {}).get(
268
  "url_encoded_fmt_stream_map")
269
- json_object['args']['stream_map'] = self._parse_stream_map(
270
  encoded_stream_map)
271
  return json_object
272
 
@@ -298,7 +302,7 @@ class YouTube(object):
298
  for kv in video:
299
  key, value = kv.split("=")
300
  dct.get(key, []).append(unquote(value))
301
- log.debug('decoded stream map: %s', dct)
302
  return dct
303
 
304
  def _get_json_data(self, html):
@@ -308,13 +312,26 @@ class YouTube(object):
308
  The raw html of the page.
309
  """
310
  # 18 represents the length of "ytplayer.config = ".
311
- start = html.find("ytplayer.config = ") + 18
 
 
 
 
 
 
 
 
312
  html = html[start:]
313
 
314
  offset = self._get_json_offset(html)
315
  if not offset:
316
  raise PytubeError("Unable to extract json.")
317
- return json.loads(html[:offset])
 
 
 
 
 
318
 
319
  def _get_json_offset(self, html):
320
  """Find where the json object starts.
@@ -322,16 +339,18 @@ class YouTube(object):
322
  :param str html:
323
  The raw html of the YouTube page.
324
  """
325
- brackets = []
326
  index = 1
327
  # Determine the offset by pushing/popping brackets until all
328
  # js expressions are closed.
329
  for idx, ch in enumerate(html):
 
 
330
  if ch == "{":
331
- brackets.append("}")
332
  elif ch == "}":
333
- brackets.pop()
334
- if len(brackets) == 0:
335
  break
336
  else:
337
  raise PytubeError("Unable to determine json offset.")
 
1
  #!/usr/bin/env python
2
  # -*- coding: utf-8 -*-
 
3
  import json
4
  import logging
5
  import re
 
37
 
38
  # The keys corresponding to the quality/codec map above.
39
  YT_QUALITY_PROFILE_KEYS = (
40
+ "extension",
41
+ "resolution",
42
+ "video_codec",
43
+ "profile",
44
+ "video_bitrate",
45
+ "audio_codec",
46
+ "audio_bitrate"
47
  )
48
 
49
 
 
84
  def video_id(self):
85
  """Gets the video id by parsing and extracting it from the url."""
86
  parts = urlparse(self._video_url)
87
+ qs = getattr(parts, "query")
88
  if qs:
89
+ video_id = parse_qs(qs).get("v")
90
  if video_id:
91
  return video_id.pop()
92
 
 
182
  # Check if we have the signature, otherwise we'll need to get the
183
  # cipher from the js.
184
  if "signature=" not in url:
185
+ log.debug("signature not in url, attempting to resolve the "
186
+ "cipher.")
187
  signature = self._get_cipher(stream_map["s"][idx], js_url)
188
  url = "{0}&signature={1}".format(url, signature)
189
  self._add_video(url, self.filename, **quality_profile)
 
254
  raise PytubeError("Unable to open url: {0}".format(self.url))
255
 
256
  html = response.read()
257
+ if isinstance(html, str):
258
+ restriction_pattern = "og:restrictions:age"
259
+ else:
260
+ restriction_pattern = bytes("og:restrictions:age", "utf-8")
261
+
262
+ if restriction_pattern in html:
263
  raise AgeRestricted("Age restricted video. Unable to download "
264
  "without being signed in.")
265
 
 
270
  # do this just so we just can return one object for the video data.
271
  encoded_stream_map = json_object.get("args", {}).get(
272
  "url_encoded_fmt_stream_map")
273
+ json_object["args"]["stream_map"] = self._parse_stream_map(
274
  encoded_stream_map)
275
  return json_object
276
 
 
302
  for kv in video:
303
  key, value = kv.split("=")
304
  dct.get(key, []).append(unquote(value))
305
+ log.debug("decoded stream map: %s", dct)
306
  return dct
307
 
308
  def _get_json_data(self, html):
 
312
  The raw html of the page.
313
  """
314
  # 18 represents the length of "ytplayer.config = ".
315
+ if isinstance(html, str):
316
+ json_start_pattern = "ytplayer.config = "
317
+ else:
318
+ json_start_pattern = bytes("ytplayer.config = ", "utf-8")
319
+ pattern_idx = html.find(json_start_pattern)
320
+ # In case video is unable to play
321
+ if(pattern_idx == -1):
322
+ raise PytubeError("Unable to find start pattern.")
323
+ start = pattern_idx + 18
324
  html = html[start:]
325
 
326
  offset = self._get_json_offset(html)
327
  if not offset:
328
  raise PytubeError("Unable to extract json.")
329
+ if isinstance(html, str):
330
+ json_content = json.loads(html[:offset])
331
+ else:
332
+ json_content = json.loads(html[:offset].decode("utf-8"))
333
+
334
+ return json_content
335
 
336
  def _get_json_offset(self, html):
337
  """Find where the json object starts.
 
339
  :param str html:
340
  The raw html of the YouTube page.
341
  """
342
+ unmatched_brackets_num = 0
343
  index = 1
344
  # Determine the offset by pushing/popping brackets until all
345
  # js expressions are closed.
346
  for idx, ch in enumerate(html):
347
+ if isinstance(ch, int):
348
+ ch = chr(ch)
349
  if ch == "{":
350
+ unmatched_brackets_num += 1
351
  elif ch == "}":
352
+ unmatched_brackets_num -= 1
353
+ if unmatched_brackets_num == 0:
354
  break
355
  else:
356
  raise PytubeError("Unable to determine json offset.")