multi range file streaming
Browse files- README.md +1 -1
- pytube/cli.py +1 -4
- pytube/helpers.py +1 -5
- pytube/monostate.py +1 -12
- pytube/request.py +24 -8
- pytube/streams.py +10 -23
- tests/test_cli.py +2 -3
- tests/test_request.py +5 -4
- tests/test_streams.py +4 -5
README.md
CHANGED
@@ -36,7 +36,7 @@ $ pip install pytube3 --upgrade
|
|
36 |
## Quick start
|
37 |
```python
|
38 |
>>> from pytube import YouTube
|
39 |
-
>>> YouTube('https://youtu.be/9bZkp7q19f0').streams
|
40 |
>>>
|
41 |
>>> yt = YouTube('http://youtube.com/watch?v=9bZkp7q19f0')
|
42 |
>>> yt.streams
|
|
|
36 |
## Quick start
|
37 |
```python
|
38 |
>>> from pytube import YouTube
|
39 |
+
>>> YouTube('https://youtu.be/9bZkp7q19f0').streams.get_highest_resolution().download()
|
40 |
>>>
|
41 |
>>> yt = YouTube('http://youtube.com/watch?v=9bZkp7q19f0')
|
42 |
>>> yt.streams
|
pytube/cli.py
CHANGED
@@ -11,7 +11,6 @@ import os
|
|
11 |
import shutil
|
12 |
import sys
|
13 |
import subprocess # nosec
|
14 |
-
from io import BufferedWriter
|
15 |
from typing import Any, Optional, List
|
16 |
|
17 |
from pytube import __version__, CaptionQuery, Stream, Playlist
|
@@ -210,9 +209,7 @@ def display_progress_bar(
|
|
210 |
|
211 |
|
212 |
# noinspection PyUnusedLocal
|
213 |
-
def on_progress(
|
214 |
-
stream: Any, chunk: Any, file_handler: BufferedWriter, bytes_remaining: int
|
215 |
-
) -> None:
|
216 |
filesize = stream.filesize
|
217 |
bytes_received = filesize - bytes_remaining
|
218 |
display_progress_bar(bytes_received, filesize)
|
|
|
11 |
import shutil
|
12 |
import sys
|
13 |
import subprocess # nosec
|
|
|
14 |
from typing import Any, Optional, List
|
15 |
|
16 |
from pytube import __version__, CaptionQuery, Stream, Playlist
|
|
|
209 |
|
210 |
|
211 |
# noinspection PyUnusedLocal
|
212 |
+
def on_progress(stream: Any, chunk: bytes, bytes_remaining: int) -> None:
|
|
|
|
|
213 |
filesize = stream.filesize
|
214 |
bytes_received = filesize - bytes_remaining
|
215 |
display_progress_bar(bytes_received, filesize)
|
pytube/helpers.py
CHANGED
@@ -4,7 +4,6 @@
|
|
4 |
import functools
|
5 |
import logging
|
6 |
import os
|
7 |
-
import pprint
|
8 |
import re
|
9 |
import warnings
|
10 |
from typing import TypeVar, Callable, Optional, Dict, List, Any
|
@@ -34,10 +33,7 @@ def regex_search(pattern: str, string: str, group: int) -> str:
|
|
34 |
if not results:
|
35 |
raise RegexMatchError(caller="regex_search", pattern=pattern)
|
36 |
|
37 |
-
logger.debug(
|
38 |
-
"finished regex search: %s",
|
39 |
-
pprint.pformat({"pattern": pattern, "results": results.group(0),}, indent=2,),
|
40 |
-
)
|
41 |
|
42 |
return results.group(group)
|
43 |
|
|
|
4 |
import functools
|
5 |
import logging
|
6 |
import os
|
|
|
7 |
import re
|
8 |
import warnings
|
9 |
from typing import TypeVar, Callable, Optional, Dict, List, Any
|
|
|
33 |
if not results:
|
34 |
raise RegexMatchError(caller="regex_search", pattern=pattern)
|
35 |
|
36 |
+
logger.debug("matched regex search: %s", pattern)
|
|
|
|
|
|
|
37 |
|
38 |
return results.group(group)
|
39 |
|
pytube/monostate.py
CHANGED
@@ -1,18 +1,11 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
-
import io
|
4 |
from typing import Any, Optional
|
5 |
from typing_extensions import Protocol
|
6 |
|
7 |
|
8 |
class OnProgress(Protocol):
|
9 |
-
def __call__(
|
10 |
-
self,
|
11 |
-
stream: bytes,
|
12 |
-
chunk: Any,
|
13 |
-
file_handler: io.BufferedWriter,
|
14 |
-
bytes_remaining: int,
|
15 |
-
) -> None:
|
16 |
"""On download progress callback function.
|
17 |
|
18 |
:param stream:
|
@@ -21,10 +14,6 @@ class OnProgress(Protocol):
|
|
21 |
:py:class:`pytube.Stream`
|
22 |
:param str chunk:
|
23 |
Segment of media file binary data, not yet written to disk.
|
24 |
-
:param file_handler:
|
25 |
-
The file handle where the media is being written to.
|
26 |
-
:type file_handler:
|
27 |
-
:py:class:`io.BufferedWriter`
|
28 |
:param int bytes_remaining:
|
29 |
How many bytes have been downloaded.
|
30 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
|
|
3 |
from typing import Any, Optional
|
4 |
from typing_extensions import Protocol
|
5 |
|
6 |
|
7 |
class OnProgress(Protocol):
|
8 |
+
def __call__(self, stream: Any, chunk: bytes, bytes_remaining: int) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"""On download progress callback function.
|
10 |
|
11 |
:param stream:
|
|
|
14 |
:py:class:`pytube.Stream`
|
15 |
:param str chunk:
|
16 |
Segment of media file binary data, not yet written to disk.
|
|
|
|
|
|
|
|
|
17 |
:param int bytes_remaining:
|
18 |
How many bytes have been downloaded.
|
19 |
|
pytube/request.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
"""Implements a simple wrapper around urlopen."""
|
|
|
4 |
from typing import Any, Iterable, Dict, Optional
|
5 |
from urllib.request import Request
|
6 |
from urllib.request import urlopen
|
@@ -31,18 +32,33 @@ def get(url) -> str:
|
|
31 |
return _execute_request(url).read().decode("utf-8")
|
32 |
|
33 |
|
34 |
-
def stream(
|
|
|
|
|
35 |
"""Read the response in chunks.
|
36 |
:param str url: The URL to perform the GET request for.
|
37 |
-
:param int chunk_size: The size in bytes of each chunk. Defaults to
|
|
|
38 |
:rtype: Iterable[bytes]
|
39 |
"""
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
def head(url: str) -> Dict:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
|
3 |
"""Implements a simple wrapper around urlopen."""
|
4 |
+
from functools import lru_cache
|
5 |
from typing import Any, Iterable, Dict, Optional
|
6 |
from urllib.request import Request
|
7 |
from urllib.request import urlopen
|
|
|
32 |
return _execute_request(url).read().decode("utf-8")
|
33 |
|
34 |
|
35 |
+
def stream(
|
36 |
+
url: str, chunk_size: int = 1024, range_size: int = 10485760
|
37 |
+
) -> Iterable[bytes]:
|
38 |
"""Read the response in chunks.
|
39 |
:param str url: The URL to perform the GET request for.
|
40 |
+
:param int chunk_size: The size in bytes of each chunk. Defaults to 1KB
|
41 |
+
:param int range_size: The size in bytes of each range request. Defaults to 10MB
|
42 |
:rtype: Iterable[bytes]
|
43 |
"""
|
44 |
+
file_size: int = filesize(url)
|
45 |
+
downloaded = 0
|
46 |
+
while downloaded < file_size:
|
47 |
+
stop_pos = min(downloaded + range_size, file_size) - 1
|
48 |
+
range_header = f"bytes={downloaded}-{stop_pos}"
|
49 |
+
r = _execute_request(url, method="GET", headers={"Range": range_header})
|
50 |
+
while True:
|
51 |
+
chunk = r.read(chunk_size)
|
52 |
+
if not chunk:
|
53 |
+
break
|
54 |
+
downloaded += len(chunk)
|
55 |
+
yield chunk
|
56 |
+
return
|
57 |
+
|
58 |
+
|
59 |
+
@lru_cache(maxsize=None)
|
60 |
+
def filesize(url: str) -> int:
|
61 |
+
return int(head(url)["content-length"])
|
62 |
|
63 |
|
64 |
def head(url: str) -> Dict:
|
pytube/streams.py
CHANGED
@@ -9,11 +9,9 @@ has been renamed to accommodate DASH (which serves the audio and video
|
|
9 |
separately).
|
10 |
"""
|
11 |
|
12 |
-
import io
|
13 |
import logging
|
14 |
import os
|
15 |
-
import
|
16 |
-
from typing import Dict, Tuple, Optional
|
17 |
|
18 |
from pytube import extract
|
19 |
from pytube import request
|
@@ -143,8 +141,7 @@ class Stream:
|
|
143 |
Filesize (in bytes) of the stream.
|
144 |
"""
|
145 |
if self._filesize is None:
|
146 |
-
|
147 |
-
self._filesize = int(headers["content-length"])
|
148 |
return self._filesize
|
149 |
|
150 |
@property
|
@@ -255,15 +252,14 @@ class Stream:
|
|
255 |
def exists_at_path(self, file_path: str) -> bool:
|
256 |
return os.path.isfile(file_path) and os.path.getsize(file_path) == self.filesize
|
257 |
|
258 |
-
def stream_to_buffer(self) ->
|
259 |
"""Write the media stream to buffer
|
260 |
|
261 |
:rtype: io.BytesIO buffer
|
262 |
"""
|
263 |
-
buffer = io.BytesIO()
|
264 |
bytes_remaining = self.filesize
|
265 |
-
logger.
|
266 |
-
"downloading (%s total bytes) file to
|
267 |
)
|
268 |
|
269 |
for chunk in request.stream(self.url):
|
@@ -272,9 +268,8 @@ class Stream:
|
|
272 |
# send to the on_progress callback.
|
273 |
self.on_progress(chunk, buffer, bytes_remaining)
|
274 |
self.on_complete(None)
|
275 |
-
return buffer
|
276 |
|
277 |
-
def on_progress(self, chunk, file_handler, bytes_remaining):
|
278 |
"""On progress callback function.
|
279 |
|
280 |
This function writes the binary data to the file, then checks if an
|
@@ -295,24 +290,16 @@ class Stream:
|
|
295 |
|
296 |
"""
|
297 |
file_handler.write(chunk)
|
298 |
-
logger.debug(
|
299 |
-
|
300 |
-
|
301 |
-
{"chunk_size": len(chunk), "bytes_remaining": bytes_remaining,},
|
302 |
-
indent=2,
|
303 |
-
),
|
304 |
-
)
|
305 |
-
on_progress = self._monostate.on_progress
|
306 |
-
if on_progress:
|
307 |
-
logger.debug("calling on_progress callback %s", on_progress)
|
308 |
-
on_progress(self, chunk, file_handler, bytes_remaining)
|
309 |
|
310 |
def on_complete(self, file_path: Optional[str]):
|
311 |
"""On download complete handler function.
|
312 |
|
313 |
:param file_path:
|
314 |
The file handle where the media is being written to.
|
315 |
-
:type
|
316 |
|
317 |
:rtype: None
|
318 |
|
|
|
9 |
separately).
|
10 |
"""
|
11 |
|
|
|
12 |
import logging
|
13 |
import os
|
14 |
+
from typing import Dict, Tuple, Optional, BinaryIO
|
|
|
15 |
|
16 |
from pytube import extract
|
17 |
from pytube import request
|
|
|
141 |
Filesize (in bytes) of the stream.
|
142 |
"""
|
143 |
if self._filesize is None:
|
144 |
+
self._filesize = request.filesize(self.url)
|
|
|
145 |
return self._filesize
|
146 |
|
147 |
@property
|
|
|
252 |
def exists_at_path(self, file_path: str) -> bool:
|
253 |
return os.path.isfile(file_path) and os.path.getsize(file_path) == self.filesize
|
254 |
|
255 |
+
def stream_to_buffer(self, buffer: BinaryIO) -> None:
|
256 |
"""Write the media stream to buffer
|
257 |
|
258 |
:rtype: io.BytesIO buffer
|
259 |
"""
|
|
|
260 |
bytes_remaining = self.filesize
|
261 |
+
logger.info(
|
262 |
+
"downloading (%s total bytes) file to buffer", self.filesize,
|
263 |
)
|
264 |
|
265 |
for chunk in request.stream(self.url):
|
|
|
268 |
# send to the on_progress callback.
|
269 |
self.on_progress(chunk, buffer, bytes_remaining)
|
270 |
self.on_complete(None)
|
|
|
271 |
|
272 |
+
def on_progress(self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int):
|
273 |
"""On progress callback function.
|
274 |
|
275 |
This function writes the binary data to the file, then checks if an
|
|
|
290 |
|
291 |
"""
|
292 |
file_handler.write(chunk)
|
293 |
+
logger.debug("download remaining: %s", bytes_remaining)
|
294 |
+
if self._monostate.on_progress:
|
295 |
+
self._monostate.on_progress(self, chunk, bytes_remaining)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
def on_complete(self, file_path: Optional[str]):
|
298 |
"""On download complete handler function.
|
299 |
|
300 |
:param file_path:
|
301 |
The file handle where the media is being written to.
|
302 |
+
:type file_path: str
|
303 |
|
304 |
:rtype: None
|
305 |
|
tests/test_cli.py
CHANGED
@@ -125,11 +125,10 @@ def test_display_progress_bar(capsys):
|
|
125 |
|
126 |
|
127 |
@mock.patch("pytube.Stream")
|
128 |
-
|
129 |
-
def test_on_progress(stream, writer):
|
130 |
stream.filesize = 10
|
131 |
cli.display_progress_bar = MagicMock()
|
132 |
-
cli.on_progress(stream, "",
|
133 |
cli.display_progress_bar.assert_called_once_with(3, 10)
|
134 |
|
135 |
|
|
|
125 |
|
126 |
|
127 |
@mock.patch("pytube.Stream")
|
128 |
+
def test_on_progress(stream):
|
|
|
129 |
stream.filesize = 10
|
130 |
cli.display_progress_bar = MagicMock()
|
131 |
+
cli.on_progress(stream, "", 7)
|
132 |
cli.display_progress_bar.assert_called_once_with(3, 10)
|
133 |
|
134 |
|
tests/test_request.py
CHANGED
@@ -7,12 +7,13 @@ import pytest
|
|
7 |
from pytube import request
|
8 |
|
9 |
|
|
|
10 |
@mock.patch("pytube.request.urlopen")
|
11 |
-
def test_streaming(mock_urlopen):
|
12 |
fake_stream_binary = [
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
None,
|
17 |
]
|
18 |
response = mock.Mock()
|
|
|
7 |
from pytube import request
|
8 |
|
9 |
|
10 |
+
@mock.patch("pytube.request.filesize", return_value=3 * 8 * 1024)
|
11 |
@mock.patch("pytube.request.urlopen")
|
12 |
+
def test_streaming(mock_urlopen, filesize):
|
13 |
fake_stream_binary = [
|
14 |
+
os.urandom(8 * 1024),
|
15 |
+
os.urandom(8 * 1024),
|
16 |
+
os.urandom(8 * 1024),
|
17 |
None,
|
18 |
]
|
19 |
response = mock.Mock()
|
tests/test_streams.py
CHANGED
@@ -15,12 +15,11 @@ def test_filesize(cipher_signature, mocker):
|
|
15 |
|
16 |
|
17 |
def test_filesize_approx(cipher_signature, mocker):
|
18 |
-
mocker.patch.object(request, "head")
|
19 |
-
request.head.return_value = {"content-length": "123"}
|
20 |
stream = cipher_signature.streams[0]
|
|
|
21 |
assert stream.filesize_approx == 22350604
|
22 |
stream.bitrate = None
|
23 |
-
assert stream.filesize_approx ==
|
24 |
|
25 |
|
26 |
def test_default_filename(cipher_signature):
|
@@ -188,8 +187,8 @@ def test_on_progress_hook(cipher_signature, mocker):
|
|
188 |
stream.download()
|
189 |
assert callback_fn.called
|
190 |
args, _ = callback_fn.call_args
|
191 |
-
assert len(args) ==
|
192 |
-
stream, _, _
|
193 |
assert isinstance(stream, Stream)
|
194 |
|
195 |
|
|
|
15 |
|
16 |
|
17 |
def test_filesize_approx(cipher_signature, mocker):
|
|
|
|
|
18 |
stream = cipher_signature.streams[0]
|
19 |
+
|
20 |
assert stream.filesize_approx == 22350604
|
21 |
stream.bitrate = None
|
22 |
+
assert stream.filesize_approx == 6796391
|
23 |
|
24 |
|
25 |
def test_default_filename(cipher_signature):
|
|
|
187 |
stream.download()
|
188 |
assert callback_fn.called
|
189 |
args, _ = callback_fn.call_args
|
190 |
+
assert len(args) == 3
|
191 |
+
stream, _, _ = args
|
192 |
assert isinstance(stream, Stream)
|
193 |
|
194 |
|