Merge pull request #61 from hbmartin/range-stream
Browse files- .idea/dictionaries/haroldmartin.xml +1 -0
- README.md +1 -1
- pytube/cli.py +2 -3
- pytube/helpers.py +1 -5
- pytube/monostate.py +1 -12
- pytube/request.py +29 -8
- pytube/streams.py +10 -23
- pytube/version.py +1 -1
- tests/test_cli.py +2 -3
- tests/test_request.py +8 -5
- tests/test_streams.py +23 -4
.idea/dictionaries/haroldmartin.xml
CHANGED
@@ -28,6 +28,7 @@
|
|
28 |
<w>nosec</w>
|
29 |
<w>ntfs</w>
|
30 |
<w>prog</w>
|
|
|
31 |
<w>pytube</w>
|
32 |
<w>recompiles</w>
|
33 |
<w>samp</w>
|
|
|
28 |
<w>nosec</w>
|
29 |
<w>ntfs</w>
|
30 |
<w>prog</w>
|
31 |
+
<w>pylint</w>
|
32 |
<w>pytube</w>
|
33 |
<w>recompiles</w>
|
34 |
<w>samp</w>
|
README.md
CHANGED
@@ -36,7 +36,7 @@ $ pip install pytube3 --upgrade
|
|
36 |
## Quick start
|
37 |
```python
|
38 |
>>> from pytube import YouTube
|
39 |
-
>>> YouTube('https://youtu.be/9bZkp7q19f0').streams
|
40 |
>>>
|
41 |
>>> yt = YouTube('http://youtube.com/watch?v=9bZkp7q19f0')
|
42 |
>>> yt.streams
|
|
|
36 |
## Quick start
|
37 |
```python
|
38 |
>>> from pytube import YouTube
|
39 |
+
>>> YouTube('https://youtu.be/9bZkp7q19f0').streams.get_highest_resolution().download()
|
40 |
>>>
|
41 |
>>> yt = YouTube('http://youtube.com/watch?v=9bZkp7q19f0')
|
42 |
>>> yt.streams
|
pytube/cli.py
CHANGED
@@ -11,7 +11,6 @@ import os
|
|
11 |
import shutil
|
12 |
import sys
|
13 |
import subprocess # nosec
|
14 |
-
from io import BufferedWriter
|
15 |
from typing import Any, Optional, List
|
16 |
|
17 |
from pytube import __version__, CaptionQuery, Stream, Playlist
|
@@ -211,8 +210,8 @@ def display_progress_bar(
|
|
211 |
|
212 |
# noinspection PyUnusedLocal
|
213 |
def on_progress(
|
214 |
-
stream: Any, chunk:
|
215 |
-
) -> None:
|
216 |
filesize = stream.filesize
|
217 |
bytes_received = filesize - bytes_remaining
|
218 |
display_progress_bar(bytes_received, filesize)
|
|
|
11 |
import shutil
|
12 |
import sys
|
13 |
import subprocess # nosec
|
|
|
14 |
from typing import Any, Optional, List
|
15 |
|
16 |
from pytube import __version__, CaptionQuery, Stream, Playlist
|
|
|
210 |
|
211 |
# noinspection PyUnusedLocal
|
212 |
def on_progress(
|
213 |
+
stream: Any, chunk: bytes, bytes_remaining: int
|
214 |
+
) -> None: # pylint: disable=W0613
|
215 |
filesize = stream.filesize
|
216 |
bytes_received = filesize - bytes_remaining
|
217 |
display_progress_bar(bytes_received, filesize)
|
pytube/helpers.py
CHANGED
@@ -4,7 +4,6 @@
|
|
4 |
import functools
|
5 |
import logging
|
6 |
import os
|
7 |
-
import pprint
|
8 |
import re
|
9 |
import warnings
|
10 |
from typing import TypeVar, Callable, Optional, Dict, List, Any
|
@@ -34,10 +33,7 @@ def regex_search(pattern: str, string: str, group: int) -> str:
|
|
34 |
if not results:
|
35 |
raise RegexMatchError(caller="regex_search", pattern=pattern)
|
36 |
|
37 |
-
logger.debug(
|
38 |
-
"finished regex search: %s",
|
39 |
-
pprint.pformat({"pattern": pattern, "results": results.group(0),}, indent=2,),
|
40 |
-
)
|
41 |
|
42 |
return results.group(group)
|
43 |
|
|
|
4 |
import functools
|
5 |
import logging
|
6 |
import os
|
|
|
7 |
import re
|
8 |
import warnings
|
9 |
from typing import TypeVar, Callable, Optional, Dict, List, Any
|
|
|
33 |
if not results:
|
34 |
raise RegexMatchError(caller="regex_search", pattern=pattern)
|
35 |
|
36 |
+
logger.debug("matched regex search: %s", pattern)
|
|
|
|
|
|
|
37 |
|
38 |
return results.group(group)
|
39 |
|
pytube/monostate.py
CHANGED
@@ -1,18 +1,11 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
-
import io
|
4 |
from typing import Any, Optional
|
5 |
from typing_extensions import Protocol
|
6 |
|
7 |
|
8 |
class OnProgress(Protocol):
|
9 |
-
def __call__(
|
10 |
-
self,
|
11 |
-
stream: bytes,
|
12 |
-
chunk: Any,
|
13 |
-
file_handler: io.BufferedWriter,
|
14 |
-
bytes_remaining: int,
|
15 |
-
) -> None:
|
16 |
"""On download progress callback function.
|
17 |
|
18 |
:param stream:
|
@@ -21,10 +14,6 @@ class OnProgress(Protocol):
|
|
21 |
:py:class:`pytube.Stream`
|
22 |
:param str chunk:
|
23 |
Segment of media file binary data, not yet written to disk.
|
24 |
-
:param file_handler:
|
25 |
-
The file handle where the media is being written to.
|
26 |
-
:type file_handler:
|
27 |
-
:py:class:`io.BufferedWriter`
|
28 |
:param int bytes_remaining:
|
29 |
How many bytes have been downloaded.
|
30 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
|
|
3 |
from typing import Any, Optional
|
4 |
from typing_extensions import Protocol
|
5 |
|
6 |
|
7 |
class OnProgress(Protocol):
|
8 |
+
def __call__(self, stream: Any, chunk: bytes, bytes_remaining: int) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"""On download progress callback function.
|
10 |
|
11 |
:param stream:
|
|
|
14 |
:py:class:`pytube.Stream`
|
15 |
:param str chunk:
|
16 |
Segment of media file binary data, not yet written to disk.
|
|
|
|
|
|
|
|
|
17 |
:param int bytes_remaining:
|
18 |
How many bytes have been downloaded.
|
19 |
|
pytube/request.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
"""Implements a simple wrapper around urlopen."""
|
|
|
4 |
from typing import Any, Iterable, Dict, Optional
|
5 |
from urllib.request import Request
|
6 |
from urllib.request import urlopen
|
@@ -31,18 +32,38 @@ def get(url) -> str:
|
|
31 |
return _execute_request(url).read().decode("utf-8")
|
32 |
|
33 |
|
34 |
-
def stream(
|
|
|
|
|
35 |
"""Read the response in chunks.
|
36 |
:param str url: The URL to perform the GET request for.
|
37 |
-
:param int chunk_size: The size in bytes of each chunk. Defaults to
|
|
|
38 |
:rtype: Iterable[bytes]
|
39 |
"""
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
def head(url: str) -> Dict:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
"""Implements a simple wrapper around urlopen."""
|
4 |
+
from functools import lru_cache
|
5 |
from typing import Any, Iterable, Dict, Optional
|
6 |
from urllib.request import Request
|
7 |
from urllib.request import urlopen
|
|
|
32 |
return _execute_request(url).read().decode("utf-8")
|
33 |
|
34 |
|
35 |
+
def stream(
|
36 |
+
url: str, chunk_size: int = 1024, range_size: int = 10485760
|
37 |
+
) -> Iterable[bytes]:
|
38 |
"""Read the response in chunks.
|
39 |
:param str url: The URL to perform the GET request for.
|
40 |
+
:param int chunk_size: The size in bytes of each chunk. Defaults to 1KB
|
41 |
+
:param int range_size: The size in bytes of each range request. Defaults to 10MB
|
42 |
:rtype: Iterable[bytes]
|
43 |
"""
|
44 |
+
file_size: int = filesize(url)
|
45 |
+
downloaded = 0
|
46 |
+
while downloaded < file_size:
|
47 |
+
stop_pos = min(downloaded + range_size, file_size) - 1
|
48 |
+
range_header = f"bytes={downloaded}-{stop_pos}"
|
49 |
+
response = _execute_request(url, method="GET", headers={"Range": range_header})
|
50 |
+
while True:
|
51 |
+
chunk = response.read(chunk_size)
|
52 |
+
if not chunk:
|
53 |
+
break
|
54 |
+
downloaded += len(chunk)
|
55 |
+
yield chunk
|
56 |
+
return # pylint: disable=R1711
|
57 |
+
|
58 |
+
|
59 |
+
@lru_cache(maxsize=None)
|
60 |
+
def filesize(url: str) -> int:
|
61 |
+
"""Fetch size in bytes of file at given URL
|
62 |
+
|
63 |
+
:param str url: The URL to get the size of
|
64 |
+
:returns: int: size in bytes of remote file
|
65 |
+
"""
|
66 |
+
return int(head(url)["content-length"])
|
67 |
|
68 |
|
69 |
def head(url: str) -> Dict:
|
pytube/streams.py
CHANGED
@@ -9,11 +9,9 @@ has been renamed to accommodate DASH (which serves the audio and video
|
|
9 |
separately).
|
10 |
"""
|
11 |
|
12 |
-
import io
|
13 |
import logging
|
14 |
import os
|
15 |
-
import
|
16 |
-
from typing import Dict, Tuple, Optional
|
17 |
|
18 |
from pytube import extract
|
19 |
from pytube import request
|
@@ -143,8 +141,7 @@ class Stream:
|
|
143 |
Filesize (in bytes) of the stream.
|
144 |
"""
|
145 |
if self._filesize is None:
|
146 |
-
|
147 |
-
self._filesize = int(headers["content-length"])
|
148 |
return self._filesize
|
149 |
|
150 |
@property
|
@@ -255,15 +252,14 @@ class Stream:
|
|
255 |
def exists_at_path(self, file_path: str) -> bool:
|
256 |
return os.path.isfile(file_path) and os.path.getsize(file_path) == self.filesize
|
257 |
|
258 |
-
def stream_to_buffer(self) ->
|
259 |
"""Write the media stream to buffer
|
260 |
|
261 |
:rtype: io.BytesIO buffer
|
262 |
"""
|
263 |
-
buffer = io.BytesIO()
|
264 |
bytes_remaining = self.filesize
|
265 |
-
logger.
|
266 |
-
"downloading (%s total bytes) file to
|
267 |
)
|
268 |
|
269 |
for chunk in request.stream(self.url):
|
@@ -272,9 +268,8 @@ class Stream:
|
|
272 |
# send to the on_progress callback.
|
273 |
self.on_progress(chunk, buffer, bytes_remaining)
|
274 |
self.on_complete(None)
|
275 |
-
return buffer
|
276 |
|
277 |
-
def on_progress(self, chunk, file_handler, bytes_remaining):
|
278 |
"""On progress callback function.
|
279 |
|
280 |
This function writes the binary data to the file, then checks if an
|
@@ -295,24 +290,16 @@ class Stream:
|
|
295 |
|
296 |
"""
|
297 |
file_handler.write(chunk)
|
298 |
-
logger.debug(
|
299 |
-
|
300 |
-
|
301 |
-
{"chunk_size": len(chunk), "bytes_remaining": bytes_remaining,},
|
302 |
-
indent=2,
|
303 |
-
),
|
304 |
-
)
|
305 |
-
on_progress = self._monostate.on_progress
|
306 |
-
if on_progress:
|
307 |
-
logger.debug("calling on_progress callback %s", on_progress)
|
308 |
-
on_progress(self, chunk, file_handler, bytes_remaining)
|
309 |
|
310 |
def on_complete(self, file_path: Optional[str]):
|
311 |
"""On download complete handler function.
|
312 |
|
313 |
:param file_path:
|
314 |
The file handle where the media is being written to.
|
315 |
-
:type
|
316 |
|
317 |
:rtype: None
|
318 |
|
|
|
9 |
separately).
|
10 |
"""
|
11 |
|
|
|
12 |
import logging
|
13 |
import os
|
14 |
+
from typing import Dict, Tuple, Optional, BinaryIO
|
|
|
15 |
|
16 |
from pytube import extract
|
17 |
from pytube import request
|
|
|
141 |
Filesize (in bytes) of the stream.
|
142 |
"""
|
143 |
if self._filesize is None:
|
144 |
+
self._filesize = request.filesize(self.url)
|
|
|
145 |
return self._filesize
|
146 |
|
147 |
@property
|
|
|
252 |
def exists_at_path(self, file_path: str) -> bool:
|
253 |
return os.path.isfile(file_path) and os.path.getsize(file_path) == self.filesize
|
254 |
|
255 |
+
def stream_to_buffer(self, buffer: BinaryIO) -> None:
|
256 |
"""Write the media stream to buffer
|
257 |
|
258 |
:rtype: io.BytesIO buffer
|
259 |
"""
|
|
|
260 |
bytes_remaining = self.filesize
|
261 |
+
logger.info(
|
262 |
+
"downloading (%s total bytes) file to buffer", self.filesize,
|
263 |
)
|
264 |
|
265 |
for chunk in request.stream(self.url):
|
|
|
268 |
# send to the on_progress callback.
|
269 |
self.on_progress(chunk, buffer, bytes_remaining)
|
270 |
self.on_complete(None)
|
|
|
271 |
|
272 |
+
def on_progress(self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int):
|
273 |
"""On progress callback function.
|
274 |
|
275 |
This function writes the binary data to the file, then checks if an
|
|
|
290 |
|
291 |
"""
|
292 |
file_handler.write(chunk)
|
293 |
+
logger.debug("download remaining: %s", bytes_remaining)
|
294 |
+
if self._monostate.on_progress:
|
295 |
+
self._monostate.on_progress(self, chunk, bytes_remaining)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
def on_complete(self, file_path: Optional[str]):
|
298 |
"""On download complete handler function.
|
299 |
|
300 |
:param file_path:
|
301 |
The file handle where the media is being written to.
|
302 |
+
:type file_path: str
|
303 |
|
304 |
:rtype: None
|
305 |
|
pytube/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
-
__version__ = "9.6.
|
4 |
|
5 |
if __name__ == "__main__":
|
6 |
print(__version__)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
+
__version__ = "9.6.3"
|
4 |
|
5 |
if __name__ == "__main__":
|
6 |
print(__version__)
|
tests/test_cli.py
CHANGED
@@ -125,11 +125,10 @@ def test_display_progress_bar(capsys):
|
|
125 |
|
126 |
|
127 |
@mock.patch("pytube.Stream")
|
128 |
-
|
129 |
-
def test_on_progress(stream, writer):
|
130 |
stream.filesize = 10
|
131 |
cli.display_progress_bar = MagicMock()
|
132 |
-
cli.on_progress(stream, "",
|
133 |
cli.display_progress_bar.assert_called_once_with(3, 10)
|
134 |
|
135 |
|
|
|
125 |
|
126 |
|
127 |
@mock.patch("pytube.Stream")
|
128 |
+
def test_on_progress(stream):
|
|
|
129 |
stream.filesize = 10
|
130 |
cli.display_progress_bar = MagicMock()
|
131 |
+
cli.on_progress(stream, "", 7)
|
132 |
cli.display_progress_bar.assert_called_once_with(3, 10)
|
133 |
|
134 |
|
tests/test_request.py
CHANGED
@@ -7,20 +7,23 @@ import pytest
|
|
7 |
from pytube import request
|
8 |
|
9 |
|
|
|
10 |
@mock.patch("pytube.request.urlopen")
|
11 |
-
def test_streaming(mock_urlopen):
|
|
|
12 |
fake_stream_binary = [
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
None,
|
17 |
]
|
18 |
response = mock.Mock()
|
19 |
response.read.side_effect = fake_stream_binary
|
20 |
mock_urlopen.return_value = response
|
|
|
21 |
response = request.stream("http://fakeassurl.gov")
|
|
|
22 |
call_count = len(list(response))
|
23 |
-
|
24 |
assert call_count == 3
|
25 |
|
26 |
|
|
|
7 |
from pytube import request
|
8 |
|
9 |
|
10 |
+
@mock.patch("pytube.request.filesize", return_value=3 * 8 * 1024)
|
11 |
@mock.patch("pytube.request.urlopen")
|
12 |
+
def test_streaming(mock_urlopen, filesize):
|
13 |
+
# Given
|
14 |
fake_stream_binary = [
|
15 |
+
os.urandom(8 * 1024),
|
16 |
+
os.urandom(8 * 1024),
|
17 |
+
os.urandom(8 * 1024),
|
18 |
None,
|
19 |
]
|
20 |
response = mock.Mock()
|
21 |
response.read.side_effect = fake_stream_binary
|
22 |
mock_urlopen.return_value = response
|
23 |
+
# When
|
24 |
response = request.stream("http://fakeassurl.gov")
|
25 |
+
# Then
|
26 |
call_count = len(list(response))
|
|
|
27 |
assert call_count == 3
|
28 |
|
29 |
|
tests/test_streams.py
CHANGED
@@ -8,6 +8,24 @@ from pytube import request
|
|
8 |
from pytube import Stream, streams
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def test_filesize(cipher_signature, mocker):
|
12 |
mocker.patch.object(request, "head")
|
13 |
request.head.return_value = {"content-length": "6796391"}
|
@@ -16,11 +34,12 @@ def test_filesize(cipher_signature, mocker):
|
|
16 |
|
17 |
def test_filesize_approx(cipher_signature, mocker):
|
18 |
mocker.patch.object(request, "head")
|
19 |
-
request.head.return_value = {"content-length": "
|
20 |
stream = cipher_signature.streams[0]
|
|
|
21 |
assert stream.filesize_approx == 22350604
|
22 |
stream.bitrate = None
|
23 |
-
assert stream.filesize_approx ==
|
24 |
|
25 |
|
26 |
def test_default_filename(cipher_signature):
|
@@ -188,8 +207,8 @@ def test_on_progress_hook(cipher_signature, mocker):
|
|
188 |
stream.download()
|
189 |
assert callback_fn.called
|
190 |
args, _ = callback_fn.call_args
|
191 |
-
assert len(args) ==
|
192 |
-
stream, _, _
|
193 |
assert isinstance(stream, Stream)
|
194 |
|
195 |
|
|
|
8 |
from pytube import Stream, streams
|
9 |
|
10 |
|
11 |
+
@mock.patch("pytube.streams.request")
|
12 |
+
def test_stream_to_buffer(mock_request, cipher_signature):
|
13 |
+
# Given
|
14 |
+
stream_bytes = iter(
|
15 |
+
[
|
16 |
+
bytes(os.urandom(8 * 1024)),
|
17 |
+
bytes(os.urandom(8 * 1024)),
|
18 |
+
bytes(os.urandom(8 * 1024)),
|
19 |
+
]
|
20 |
+
)
|
21 |
+
mock_request.stream.return_value = stream_bytes
|
22 |
+
buffer = MagicMock()
|
23 |
+
# When
|
24 |
+
cipher_signature.streams[0].stream_to_buffer(buffer)
|
25 |
+
# Then
|
26 |
+
assert buffer.write.call_count == 3
|
27 |
+
|
28 |
+
|
29 |
def test_filesize(cipher_signature, mocker):
|
30 |
mocker.patch.object(request, "head")
|
31 |
request.head.return_value = {"content-length": "6796391"}
|
|
|
34 |
|
35 |
def test_filesize_approx(cipher_signature, mocker):
|
36 |
mocker.patch.object(request, "head")
|
37 |
+
request.head.return_value = {"content-length": "6796391"}
|
38 |
stream = cipher_signature.streams[0]
|
39 |
+
|
40 |
assert stream.filesize_approx == 22350604
|
41 |
stream.bitrate = None
|
42 |
+
assert stream.filesize_approx == 6796391
|
43 |
|
44 |
|
45 |
def test_default_filename(cipher_signature):
|
|
|
207 |
stream.download()
|
208 |
assert callback_fn.called
|
209 |
args, _ = callback_fn.call_args
|
210 |
+
assert len(args) == 3
|
211 |
+
stream, _, _ = args
|
212 |
assert isinstance(stream, Stream)
|
213 |
|
214 |
|