Merge pull request #64 from hbmartin/range-tuning
Browse filesbetter range stream defaults, avoid unnecessary head call
- pytube/request.py +17 -7
- pytube/streams.py +7 -0
- tests/test_request.py +2 -2
- tests/test_streams.py +5 -0
pytube/request.py
CHANGED
@@ -1,22 +1,26 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
"""Implements a simple wrapper around urlopen."""
|
|
|
4 |
from functools import lru_cache
|
5 |
-
from
|
|
|
6 |
from urllib.request import Request
|
7 |
from urllib.request import urlopen
|
8 |
|
|
|
|
|
9 |
|
10 |
def _execute_request(
|
11 |
url: str, method: Optional[str] = None, headers: Optional[Dict[str, str]] = None
|
12 |
-
) ->
|
13 |
base_headers = {"User-Agent": "Mozilla/5.0"}
|
14 |
if headers:
|
15 |
base_headers.update(headers)
|
16 |
if url.lower().startswith("http"):
|
17 |
request = Request(url, headers=base_headers, method=method)
|
18 |
else:
|
19 |
-
raise ValueError
|
20 |
return urlopen(request) # nosec
|
21 |
|
22 |
|
@@ -33,20 +37,26 @@ def get(url) -> str:
|
|
33 |
|
34 |
|
35 |
def stream(
|
36 |
-
url: str, chunk_size: int =
|
37 |
) -> Iterable[bytes]:
|
38 |
"""Read the response in chunks.
|
39 |
:param str url: The URL to perform the GET request for.
|
40 |
-
:param int chunk_size: The size in bytes of each chunk. Defaults to
|
41 |
-
:param int range_size: The size in bytes of each range request. Defaults to
|
42 |
:rtype: Iterable[bytes]
|
43 |
"""
|
44 |
-
file_size: int = filesize
|
45 |
downloaded = 0
|
46 |
while downloaded < file_size:
|
47 |
stop_pos = min(downloaded + range_size, file_size) - 1
|
48 |
range_header = f"bytes={downloaded}-{stop_pos}"
|
49 |
response = _execute_request(url, method="GET", headers={"Range": range_header})
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
while True:
|
51 |
chunk = response.read(chunk_size)
|
52 |
if not chunk:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
"""Implements a simple wrapper around urlopen."""
|
4 |
+
import logging
|
5 |
from functools import lru_cache
|
6 |
+
from http.client import HTTPResponse
|
7 |
+
from typing import Iterable, Dict, Optional
|
8 |
from urllib.request import Request
|
9 |
from urllib.request import urlopen
|
10 |
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
|
14 |
def _execute_request(
|
15 |
url: str, method: Optional[str] = None, headers: Optional[Dict[str, str]] = None
|
16 |
+
) -> HTTPResponse:
|
17 |
base_headers = {"User-Agent": "Mozilla/5.0"}
|
18 |
if headers:
|
19 |
base_headers.update(headers)
|
20 |
if url.lower().startswith("http"):
|
21 |
request = Request(url, headers=base_headers, method=method)
|
22 |
else:
|
23 |
+
raise ValueError("Invalid URL")
|
24 |
return urlopen(request) # nosec
|
25 |
|
26 |
|
|
|
37 |
|
38 |
|
39 |
def stream(
|
40 |
+
url: str, chunk_size: int = 4096, range_size: int = 9437184
|
41 |
) -> Iterable[bytes]:
|
42 |
"""Read the response in chunks.
|
43 |
:param str url: The URL to perform the GET request for.
|
44 |
+
:param int chunk_size: The size in bytes of each chunk. Defaults to 4KB
|
45 |
+
:param int range_size: The size in bytes of each range request. Defaults to 9MB
|
46 |
:rtype: Iterable[bytes]
|
47 |
"""
|
48 |
+
file_size: int = range_size # fake filesize to start
|
49 |
downloaded = 0
|
50 |
while downloaded < file_size:
|
51 |
stop_pos = min(downloaded + range_size, file_size) - 1
|
52 |
range_header = f"bytes={downloaded}-{stop_pos}"
|
53 |
response = _execute_request(url, method="GET", headers={"Range": range_header})
|
54 |
+
if file_size == range_size:
|
55 |
+
try:
|
56 |
+
content_range = response.info()["Content-Range"]
|
57 |
+
file_size = int(content_range.split("/")[1])
|
58 |
+
except (KeyError, IndexError, ValueError) as e:
|
59 |
+
logger.error(e)
|
60 |
while True:
|
61 |
chunk = response.read(chunk_size)
|
62 |
if not chunk:
|
pytube/streams.py
CHANGED
@@ -9,9 +9,11 @@ has been renamed to accommodate DASH (which serves the audio and video
|
|
9 |
separately).
|
10 |
"""
|
11 |
|
|
|
12 |
import logging
|
13 |
import os
|
14 |
from typing import Dict, Tuple, Optional, BinaryIO
|
|
|
15 |
|
16 |
from pytube import extract
|
17 |
from pytube import request
|
@@ -169,6 +171,11 @@ class Stream:
|
|
169 |
|
170 |
return self.filesize
|
171 |
|
|
|
|
|
|
|
|
|
|
|
172 |
@property
|
173 |
def default_filename(self) -> str:
|
174 |
"""Generate filename based on the video title.
|
|
|
9 |
separately).
|
10 |
"""
|
11 |
|
12 |
+
from datetime import datetime
|
13 |
import logging
|
14 |
import os
|
15 |
from typing import Dict, Tuple, Optional, BinaryIO
|
16 |
+
from urllib.parse import parse_qs
|
17 |
|
18 |
from pytube import extract
|
19 |
from pytube import request
|
|
|
171 |
|
172 |
return self.filesize
|
173 |
|
174 |
+
@property
|
175 |
+
def expiration(self) -> datetime:
|
176 |
+
expire = parse_qs(self.url.split("?")[1])["expire"][0]
|
177 |
+
return datetime.utcfromtimestamp(int(expire))
|
178 |
+
|
179 |
@property
|
180 |
def default_filename(self) -> str:
|
181 |
"""Generate filename based on the video title.
|
tests/test_request.py
CHANGED
@@ -7,9 +7,8 @@ import pytest
|
|
7 |
from pytube import request
|
8 |
|
9 |
|
10 |
-
@mock.patch("pytube.request.filesize", return_value=3 * 8 * 1024)
|
11 |
@mock.patch("pytube.request.urlopen")
|
12 |
-
def test_streaming(mock_urlopen
|
13 |
# Given
|
14 |
fake_stream_binary = [
|
15 |
os.urandom(8 * 1024),
|
@@ -19,6 +18,7 @@ def test_streaming(mock_urlopen, filesize):
|
|
19 |
]
|
20 |
response = mock.Mock()
|
21 |
response.read.side_effect = fake_stream_binary
|
|
|
22 |
mock_urlopen.return_value = response
|
23 |
# When
|
24 |
response = request.stream("http://fakeassurl.gov")
|
|
|
7 |
from pytube import request
|
8 |
|
9 |
|
|
|
10 |
@mock.patch("pytube.request.urlopen")
|
11 |
+
def test_streaming(mock_urlopen):
|
12 |
# Given
|
13 |
fake_stream_binary = [
|
14 |
os.urandom(8 * 1024),
|
|
|
18 |
]
|
19 |
response = mock.Mock()
|
20 |
response.read.side_effect = fake_stream_binary
|
21 |
+
response.info.return_value = {"Content-Range": "bytes 200-1000/24576"}
|
22 |
mock_urlopen.return_value = response
|
23 |
# When
|
24 |
response = request.stream("http://fakeassurl.gov")
|
tests/test_streams.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import os
|
3 |
import random
|
|
|
4 |
from unittest import mock
|
5 |
from unittest.mock import MagicMock
|
6 |
|
@@ -59,6 +60,10 @@ def test_title(cipher_signature):
|
|
59 |
assert cipher_signature.title == expected
|
60 |
|
61 |
|
|
|
|
|
|
|
|
|
62 |
def test_caption_tracks(presigned_video):
|
63 |
assert len(presigned_video.caption_tracks) == 13
|
64 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import os
|
3 |
import random
|
4 |
+
from datetime import datetime
|
5 |
from unittest import mock
|
6 |
from unittest.mock import MagicMock
|
7 |
|
|
|
60 |
assert cipher_signature.title == expected
|
61 |
|
62 |
|
63 |
+
def test_expiration(cipher_signature):
|
64 |
+
assert cipher_signature.streams[0].expiration == datetime(2020, 1, 16, 5, 12, 5)
|
65 |
+
|
66 |
+
|
67 |
def test_caption_tracks(presigned_video):
|
68 |
assert len(presigned_video.caption_tracks) == 13
|
69 |
|