hbmartin commited on
Commit
2d0e32e
·
unverified ·
2 Parent(s): c3e2bd1 ffde4a3

Merge pull request #64 from hbmartin/range-tuning

Browse files

better range stream defaults, avoid unnecessary head call

pytube/request.py CHANGED
@@ -1,22 +1,26 @@
1
  # -*- coding: utf-8 -*-
2
 
3
  """Implements a simple wrapper around urlopen."""
 
4
  from functools import lru_cache
5
- from typing import Any, Iterable, Dict, Optional
 
6
  from urllib.request import Request
7
  from urllib.request import urlopen
8
 
 
 
9
 
10
  def _execute_request(
11
  url: str, method: Optional[str] = None, headers: Optional[Dict[str, str]] = None
12
- ) -> Any:
13
  base_headers = {"User-Agent": "Mozilla/5.0"}
14
  if headers:
15
  base_headers.update(headers)
16
  if url.lower().startswith("http"):
17
  request = Request(url, headers=base_headers, method=method)
18
  else:
19
- raise ValueError
20
  return urlopen(request) # nosec
21
 
22
 
@@ -33,20 +37,26 @@ def get(url) -> str:
33
 
34
 
35
  def stream(
36
- url: str, chunk_size: int = 1024, range_size: int = 10485760
37
  ) -> Iterable[bytes]:
38
  """Read the response in chunks.
39
  :param str url: The URL to perform the GET request for.
40
- :param int chunk_size: The size in bytes of each chunk. Defaults to 1KB
41
- :param int range_size: The size in bytes of each range request. Defaults to 10MB
42
  :rtype: Iterable[bytes]
43
  """
44
- file_size: int = filesize(url)
45
  downloaded = 0
46
  while downloaded < file_size:
47
  stop_pos = min(downloaded + range_size, file_size) - 1
48
  range_header = f"bytes={downloaded}-{stop_pos}"
49
  response = _execute_request(url, method="GET", headers={"Range": range_header})
 
 
 
 
 
 
50
  while True:
51
  chunk = response.read(chunk_size)
52
  if not chunk:
 
1
  # -*- coding: utf-8 -*-
2
 
3
  """Implements a simple wrapper around urlopen."""
4
+ import logging
5
  from functools import lru_cache
6
+ from http.client import HTTPResponse
7
+ from typing import Iterable, Dict, Optional
8
  from urllib.request import Request
9
  from urllib.request import urlopen
10
 
11
+ logger = logging.getLogger(__name__)
12
+
13
 
14
  def _execute_request(
15
  url: str, method: Optional[str] = None, headers: Optional[Dict[str, str]] = None
16
+ ) -> HTTPResponse:
17
  base_headers = {"User-Agent": "Mozilla/5.0"}
18
  if headers:
19
  base_headers.update(headers)
20
  if url.lower().startswith("http"):
21
  request = Request(url, headers=base_headers, method=method)
22
  else:
23
+ raise ValueError("Invalid URL")
24
  return urlopen(request) # nosec
25
 
26
 
 
37
 
38
 
39
  def stream(
40
+ url: str, chunk_size: int = 4096, range_size: int = 9437184
41
  ) -> Iterable[bytes]:
42
  """Read the response in chunks.
43
  :param str url: The URL to perform the GET request for.
44
+ :param int chunk_size: The size in bytes of each chunk. Defaults to 4KB
45
+ :param int range_size: The size in bytes of each range request. Defaults to 9MB
46
  :rtype: Iterable[bytes]
47
  """
48
+ file_size: int = range_size # fake filesize to start
49
  downloaded = 0
50
  while downloaded < file_size:
51
  stop_pos = min(downloaded + range_size, file_size) - 1
52
  range_header = f"bytes={downloaded}-{stop_pos}"
53
  response = _execute_request(url, method="GET", headers={"Range": range_header})
54
+ if file_size == range_size:
55
+ try:
56
+ content_range = response.info()["Content-Range"]
57
+ file_size = int(content_range.split("/")[1])
58
+ except (KeyError, IndexError, ValueError) as e:
59
+ logger.error(e)
60
  while True:
61
  chunk = response.read(chunk_size)
62
  if not chunk:
pytube/streams.py CHANGED
@@ -9,9 +9,11 @@ has been renamed to accommodate DASH (which serves the audio and video
9
  separately).
10
  """
11
 
 
12
  import logging
13
  import os
14
  from typing import Dict, Tuple, Optional, BinaryIO
 
15
 
16
  from pytube import extract
17
  from pytube import request
@@ -169,6 +171,11 @@ class Stream:
169
 
170
  return self.filesize
171
 
 
 
 
 
 
172
  @property
173
  def default_filename(self) -> str:
174
  """Generate filename based on the video title.
 
9
  separately).
10
  """
11
 
12
+ from datetime import datetime
13
  import logging
14
  import os
15
  from typing import Dict, Tuple, Optional, BinaryIO
16
+ from urllib.parse import parse_qs
17
 
18
  from pytube import extract
19
  from pytube import request
 
171
 
172
  return self.filesize
173
 
174
+ @property
175
+ def expiration(self) -> datetime:
176
+ expire = parse_qs(self.url.split("?")[1])["expire"][0]
177
+ return datetime.utcfromtimestamp(int(expire))
178
+
179
  @property
180
  def default_filename(self) -> str:
181
  """Generate filename based on the video title.
tests/test_request.py CHANGED
@@ -7,9 +7,8 @@ import pytest
7
  from pytube import request
8
 
9
 
10
- @mock.patch("pytube.request.filesize", return_value=3 * 8 * 1024)
11
  @mock.patch("pytube.request.urlopen")
12
- def test_streaming(mock_urlopen, filesize):
13
  # Given
14
  fake_stream_binary = [
15
  os.urandom(8 * 1024),
@@ -19,6 +18,7 @@ def test_streaming(mock_urlopen, filesize):
19
  ]
20
  response = mock.Mock()
21
  response.read.side_effect = fake_stream_binary
 
22
  mock_urlopen.return_value = response
23
  # When
24
  response = request.stream("http://fakeassurl.gov")
 
7
  from pytube import request
8
 
9
 
 
10
  @mock.patch("pytube.request.urlopen")
11
+ def test_streaming(mock_urlopen):
12
  # Given
13
  fake_stream_binary = [
14
  os.urandom(8 * 1024),
 
18
  ]
19
  response = mock.Mock()
20
  response.read.side_effect = fake_stream_binary
21
+ response.info.return_value = {"Content-Range": "bytes 200-1000/24576"}
22
  mock_urlopen.return_value = response
23
  # When
24
  response = request.stream("http://fakeassurl.gov")
tests/test_streams.py CHANGED
@@ -1,6 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  import os
3
  import random
 
4
  from unittest import mock
5
  from unittest.mock import MagicMock
6
 
@@ -59,6 +60,10 @@ def test_title(cipher_signature):
59
  assert cipher_signature.title == expected
60
 
61
 
 
 
 
 
62
  def test_caption_tracks(presigned_video):
63
  assert len(presigned_video.caption_tracks) == 13
64
 
 
1
  # -*- coding: utf-8 -*-
2
  import os
3
  import random
4
+ from datetime import datetime
5
  from unittest import mock
6
  from unittest.mock import MagicMock
7
 
 
60
  assert cipher_signature.title == expected
61
 
62
 
63
+ def test_expiration(cipher_signature):
64
+ assert cipher_signature.streams[0].expiration == datetime(2020, 1, 16, 5, 12, 5)
65
+
66
+
67
  def test_caption_tracks(presigned_video):
68
  assert len(presigned_video.caption_tracks) == 13
69