Merge branch 'master' of github.com:nficano/pytube
Browse files* 'master' of github.com:nficano/pytube:
Add default for age_restricted in extract.get_ytplayer_config and extract.js_url
Added age_restricted arg to test_js_url
Flake8 E501 errors fix
Added support for age restricted videos without a signature in the url
Update README.md
Update README.md
Update README.md
Update README.md
Update README.md
Update README.md
Update README.md
Update README.md
Update README.md
- README.md +11 -18
- pytube/__main__.py +9 -4
- pytube/extract.py +12 -5
- pytube/mixins.py +7 -2
README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
|
2 |
<div align="center">
|
3 |
-
<
|
|
|
|
|
4 |
<p align="center">
|
5 |
<img src="https://img.shields.io/pypi/v/pytube.svg" alt="pypi">
|
6 |
<a href="https://travis-ci.org/nficano/pytube"><img src="https://travis-ci.org/nficano/pytube.svg?branch=master" /></a>
|
@@ -10,13 +12,10 @@
|
|
10 |
</p>
|
11 |
</div>
|
12 |
|
13 |
-
pytube
|
14 |
-
======
|
15 |
*pytube* is a lightweight, dependency-free Python library (and command-line utility) for downloading YouTube Videos.
|
16 |
|
17 |
-
Description
|
18 |
-
===========
|
19 |
-
|
20 |
YouTube is the most popular video-sharing platform in the world and as a hacker you may encounter a situation where you want to script something to download videos. For this I present to you *pytube*.
|
21 |
|
22 |
*pytube* is a lightweight library written in Python. It has no third party dependencies and aims to be highly reliable.
|
@@ -25,7 +24,7 @@ YouTube is the most popular video-sharing platform in the world and as a hacker
|
|
25 |
|
26 |
Finally *pytube* also includes a command-line utility, allowing you to quickly download videos right from terminal.
|
27 |
|
28 |
-
|
29 |
|
30 |
```python
|
31 |
>>> YouTube('https://youtu.be/9bZkp7q19f0').streams.first().download()
|
@@ -38,9 +37,7 @@ Finally *pytube* also includes a command-line utility, allowing you to quickly d
|
|
38 |
... .download()
|
39 |
```
|
40 |
|
41 |
-
Features
|
42 |
-
--------
|
43 |
-
|
44 |
- Support for Both Progressive & DASH Streams
|
45 |
- Support for downloading complete playlist
|
46 |
- Easily Register ``on_download_progress`` & ``on_download_complete`` callbacks
|
@@ -51,17 +48,15 @@ Features
|
|
51 |
- Extensively Documented Source Code
|
52 |
- No Third-Party Dependencies
|
53 |
|
54 |
-
Installation
|
55 |
-
------------
|
56 |
|
57 |
Download using pip via pypi.
|
58 |
|
59 |
```bash
|
60 |
-
pip install pytube
|
61 |
```
|
62 |
|
63 |
-
Getting started
|
64 |
-
---------------
|
65 |
|
66 |
Let's begin with showing how easy it is to download a video with pytube:
|
67 |
|
@@ -218,9 +213,7 @@ Similarly, if your application requires on-download progress logic, pytube expos
|
|
218 |
>>> yt.register_on_progress_callback(show_progress_bar)
|
219 |
```
|
220 |
|
221 |
-
|
222 |
-
Command-line interface
|
223 |
-
======================
|
224 |
|
225 |
pytube also ships with a tiny cli interface for downloading and probing videos.
|
226 |
|
|
|
1 |
|
2 |
<div align="center">
|
3 |
+
<p>
|
4 |
+
<img src="./images/pytube.png" width="350" height="328" alt="pytube logo" />
|
5 |
+
</p>
|
6 |
<p align="center">
|
7 |
<img src="https://img.shields.io/pypi/v/pytube.svg" alt="pypi">
|
8 |
<a href="https://travis-ci.org/nficano/pytube"><img src="https://travis-ci.org/nficano/pytube.svg?branch=master" /></a>
|
|
|
12 |
</p>
|
13 |
</div>
|
14 |
|
15 |
+
# pytube
|
|
|
16 |
*pytube* is a lightweight, dependency-free Python library (and command-line utility) for downloading YouTube Videos.
|
17 |
|
18 |
+
## Description
|
|
|
|
|
19 |
YouTube is the most popular video-sharing platform in the world and as a hacker you may encounter a situation where you want to script something to download videos. For this I present to you *pytube*.
|
20 |
|
21 |
*pytube* is a lightweight library written in Python. It has no third party dependencies and aims to be highly reliable.
|
|
|
24 |
|
25 |
Finally *pytube* also includes a command-line utility, allowing you to quickly download videos right from terminal.
|
26 |
|
27 |
+
### Behold, a perfect balance of simplicity versus flexibility:
|
28 |
|
29 |
```python
|
30 |
>>> YouTube('https://youtu.be/9bZkp7q19f0').streams.first().download()
|
|
|
37 |
... .download()
|
38 |
```
|
39 |
|
40 |
+
## Features
|
|
|
|
|
41 |
- Support for Both Progressive & DASH Streams
|
42 |
- Support for downloading complete playlist
|
43 |
- Easily Register ``on_download_progress`` & ``on_download_complete`` callbacks
|
|
|
48 |
- Extensively Documented Source Code
|
49 |
- No Third-Party Dependencies
|
50 |
|
51 |
+
## Installation
|
|
|
52 |
|
53 |
Download using pip via pypi.
|
54 |
|
55 |
```bash
|
56 |
+
$ pip install pytube
|
57 |
```
|
58 |
|
59 |
+
## Getting started
|
|
|
60 |
|
61 |
Let's begin with showing how easy it is to download a video with pytube:
|
62 |
|
|
|
213 |
>>> yt.register_on_progress_callback(show_progress_bar)
|
214 |
```
|
215 |
|
216 |
+
## Command-line interface
|
|
|
|
|
217 |
|
218 |
pytube also ships with a tiny cli interface for downloading and probing videos.
|
219 |
|
pytube/__main__.py
CHANGED
@@ -109,7 +109,7 @@ class YouTube(object):
|
|
109 |
self.player_config_args = self.vid_info
|
110 |
else:
|
111 |
self.player_config_args = extract.get_ytplayer_config(
|
112 |
-
self.watch_html
|
113 |
)['args']
|
114 |
|
115 |
# https://github.com/nficano/pytube/issues/165
|
@@ -123,8 +123,13 @@ class YouTube(object):
|
|
123 |
mixins.apply_descrambler(self.vid_info, fmt)
|
124 |
mixins.apply_descrambler(self.player_config_args, fmt)
|
125 |
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
# build instances of :class:`Stream <Stream>`
|
130 |
self.initialize_stream_objects(fmt)
|
@@ -157,7 +162,7 @@ class YouTube(object):
|
|
157 |
)
|
158 |
self.vid_info = request.get(self.vid_info_url)
|
159 |
if not self.age_restricted:
|
160 |
-
self.js_url = extract.js_url(self.watch_html)
|
161 |
self.js = request.get(self.js_url)
|
162 |
|
163 |
def initialize_stream_objects(self, fmt):
|
|
|
109 |
self.player_config_args = self.vid_info
|
110 |
else:
|
111 |
self.player_config_args = extract.get_ytplayer_config(
|
112 |
+
self.watch_html
|
113 |
)['args']
|
114 |
|
115 |
# https://github.com/nficano/pytube/issues/165
|
|
|
123 |
mixins.apply_descrambler(self.vid_info, fmt)
|
124 |
mixins.apply_descrambler(self.player_config_args, fmt)
|
125 |
|
126 |
+
try:
|
127 |
+
mixins.apply_signature(self.player_config_args, fmt, self.js)
|
128 |
+
except TypeError:
|
129 |
+
self.js_url = extract.js_url(
|
130 |
+
self.embed_html, self.age_restricted)
|
131 |
+
self.js = request.get(self.js_url)
|
132 |
+
mixins.apply_signature(self.player_config_args, fmt, self.js)
|
133 |
|
134 |
# build instances of :class:`Stream <Stream>`
|
135 |
self.initialize_stream_objects(fmt)
|
|
|
162 |
)
|
163 |
self.vid_info = request.get(self.vid_info_url)
|
164 |
if not self.age_restricted:
|
165 |
+
self.js_url = extract.js_url(self.watch_html, self.age_restricted)
|
166 |
self.js = request.get(self.js_url)
|
167 |
|
168 |
def initialize_stream_objects(self, fmt):
|
pytube/extract.py
CHANGED
@@ -111,7 +111,7 @@ def video_info_url(
|
|
111 |
return 'https://youtube.com/get_video_info?' + urlencode(params)
|
112 |
|
113 |
|
114 |
-
def js_url(
|
115 |
"""Get the base JavaScript url.
|
116 |
|
117 |
Construct the base JavaScript url, which contains the decipher
|
@@ -119,9 +119,11 @@ def js_url(watch_html):
|
|
119 |
|
120 |
:param str watch_html:
|
121 |
The html contents of the watch page.
|
|
|
|
|
122 |
|
123 |
"""
|
124 |
-
ytplayer_config = get_ytplayer_config(
|
125 |
base_js = ytplayer_config['assets']['js']
|
126 |
return 'https://youtube.com' + base_js
|
127 |
|
@@ -150,7 +152,7 @@ def mime_type_codec(mime_type_codec):
|
|
150 |
return mime_type, [c.strip() for c in codecs.split(',')]
|
151 |
|
152 |
|
153 |
-
def get_ytplayer_config(
|
154 |
"""Get the YouTube player configuration data from the watch html.
|
155 |
|
156 |
Extract the ``ytplayer_config``, which is json data embedded within the
|
@@ -159,10 +161,15 @@ def get_ytplayer_config(watch_html):
|
|
159 |
|
160 |
:param str watch_html:
|
161 |
The html contents of the watch page.
|
|
|
|
|
162 |
:rtype: str
|
163 |
:returns:
|
164 |
Substring of the html containing the encoded manifest data.
|
165 |
"""
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
168 |
return json.loads(yt_player_config)
|
|
|
111 |
return 'https://youtube.com/get_video_info?' + urlencode(params)
|
112 |
|
113 |
|
114 |
+
def js_url(html, age_restricted=False):
|
115 |
"""Get the base JavaScript url.
|
116 |
|
117 |
Construct the base JavaScript url, which contains the decipher
|
|
|
119 |
|
120 |
:param str watch_html:
|
121 |
The html contents of the watch page.
|
122 |
+
:param bool age_restricted:
|
123 |
+
Is video age restricted.
|
124 |
|
125 |
"""
|
126 |
+
ytplayer_config = get_ytplayer_config(html, age_restricted)
|
127 |
base_js = ytplayer_config['assets']['js']
|
128 |
return 'https://youtube.com' + base_js
|
129 |
|
|
|
152 |
return mime_type, [c.strip() for c in codecs.split(',')]
|
153 |
|
154 |
|
155 |
+
def get_ytplayer_config(html, age_restricted=False):
|
156 |
"""Get the YouTube player configuration data from the watch html.
|
157 |
|
158 |
Extract the ``ytplayer_config``, which is json data embedded within the
|
|
|
161 |
|
162 |
:param str watch_html:
|
163 |
The html contents of the watch page.
|
164 |
+
:param bool age_restricted:
|
165 |
+
Is video age restricted.
|
166 |
:rtype: str
|
167 |
:returns:
|
168 |
Substring of the html containing the encoded manifest data.
|
169 |
"""
|
170 |
+
if age_restricted:
|
171 |
+
pattern = r";yt\.setConfig\(\{'PLAYER_CONFIG':\s*({.*})(,'EXPERIMENT_FLAGS'|;)" # noqa: E501
|
172 |
+
else:
|
173 |
+
pattern = r';ytplayer\.config\s*=\s*({.*?});'
|
174 |
+
yt_player_config = regex_search(pattern, html, group=1)
|
175 |
return json.loads(yt_player_config)
|
pytube/mixins.py
CHANGED
@@ -33,11 +33,16 @@ def apply_signature(config_args, fmt, js):
|
|
33 |
if 'signature=' in url:
|
34 |
# For certain videos, YouTube will just provide them pre-signed, in
|
35 |
# which case there's no real magic to download them and we can skip
|
36 |
-
# the whole signature
|
37 |
logger.debug('signature found, skip decipher')
|
38 |
continue
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
logger.debug(
|
43 |
'finished descrambling signature for itag=%s\n%s',
|
|
|
33 |
if 'signature=' in url:
|
34 |
# For certain videos, YouTube will just provide them pre-signed, in
|
35 |
# which case there's no real magic to download them and we can skip
|
36 |
+
# the whole signature descrambling entirely.
|
37 |
logger.debug('signature found, skip decipher')
|
38 |
continue
|
39 |
|
40 |
+
if js is not None:
|
41 |
+
signature = cipher.get_signature(js, stream['s'])
|
42 |
+
else:
|
43 |
+
# signature not present in url (line 33), need js to descramble
|
44 |
+
# TypeError caught in __main__
|
45 |
+
raise TypeError('JS is None')
|
46 |
|
47 |
logger.debug(
|
48 |
'finished descrambling signature for itag=%s\n%s',
|