Taylor Fox Dahlin
commited on
Improved caption selection, and updated tests to reflect this. (#783)
Browse files- pytube/captions.py +6 -1
- tests/test_captions.py +14 -10
- tests/test_cli.py +5 -5
pytube/captions.py
CHANGED
@@ -23,7 +23,12 @@ class Caption:
|
|
23 |
"""
|
24 |
self.url = caption_track.get("baseUrl")
|
25 |
self.name = caption_track["name"]["simpleText"]
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
@property
|
29 |
def xml_captions(self) -> str:
|
|
|
23 |
"""
|
24 |
self.url = caption_track.get("baseUrl")
|
25 |
self.name = caption_track["name"]["simpleText"]
|
26 |
+
# Use "vssId" instead of "languageCode", fix issue #779
|
27 |
+
self.code = caption_track["vssId"]
|
28 |
+
# Remove preceding '.' for backwards compatibility, e.g.:
|
29 |
+
# English -> vssId: .en, languageCode: en
|
30 |
+
# English (auto-generated) -> vssId: a.en, languageCode: en
|
31 |
+
self.code = self.code.strip('.')
|
32 |
|
33 |
@property
|
34 |
def xml_captions(self) -> str:
|
tests/test_captions.py
CHANGED
@@ -14,17 +14,17 @@ from pytube import captions
|
|
14 |
|
15 |
def test_float_to_srt_time_format():
|
16 |
caption1 = Caption(
|
17 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
18 |
)
|
19 |
assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
|
20 |
|
21 |
|
22 |
def test_caption_query_sequence():
|
23 |
caption1 = Caption(
|
24 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
25 |
)
|
26 |
caption2 = Caption(
|
27 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
28 |
)
|
29 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
30 |
assert len(caption_query) == 2
|
@@ -36,10 +36,10 @@ def test_caption_query_sequence():
|
|
36 |
|
37 |
def test_caption_query_get_by_language_code_when_exists():
|
38 |
caption1 = Caption(
|
39 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
40 |
)
|
41 |
caption2 = Caption(
|
42 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
43 |
)
|
44 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
45 |
assert caption_query["en"] == caption1
|
@@ -47,10 +47,10 @@ def test_caption_query_get_by_language_code_when_exists():
|
|
47 |
|
48 |
def test_caption_query_get_by_language_code_when_not_exists():
|
49 |
caption1 = Caption(
|
50 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
51 |
)
|
52 |
caption2 = Caption(
|
53 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
54 |
)
|
55 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
56 |
with pytest.raises(KeyError):
|
@@ -68,6 +68,7 @@ def test_download(srt):
|
|
68 |
"url": "url1",
|
69 |
"name": {"simpleText": "name1"},
|
70 |
"languageCode": "en",
|
|
|
71 |
}
|
72 |
)
|
73 |
caption.download("title")
|
@@ -86,6 +87,7 @@ def test_download_with_prefix(srt):
|
|
86 |
"url": "url1",
|
87 |
"name": {"simpleText": "name1"},
|
88 |
"languageCode": "en",
|
|
|
89 |
}
|
90 |
)
|
91 |
caption.download("title", filename_prefix="1 ")
|
@@ -106,6 +108,7 @@ def test_download_with_output_path(srt):
|
|
106 |
"url": "url1",
|
107 |
"name": {"simpleText": "name1"},
|
108 |
"languageCode": "en",
|
|
|
109 |
}
|
110 |
)
|
111 |
file_path = caption.download("title", output_path="blah")
|
@@ -123,6 +126,7 @@ def test_download_xml_and_trim_extension(xml):
|
|
123 |
"url": "url1",
|
124 |
"name": {"simpleText": "name1"},
|
125 |
"languageCode": "en",
|
|
|
126 |
}
|
127 |
)
|
128 |
caption.download("title.xml", srt=False)
|
@@ -133,7 +137,7 @@ def test_download_xml_and_trim_extension(xml):
|
|
133 |
|
134 |
def test_repr():
|
135 |
caption = Caption(
|
136 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
137 |
)
|
138 |
assert str(caption) == '<Caption lang="name1" code="en">'
|
139 |
|
@@ -145,7 +149,7 @@ def test_repr():
|
|
145 |
def test_xml_captions(request_get):
|
146 |
request_get.return_value = "test"
|
147 |
caption = Caption(
|
148 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
149 |
)
|
150 |
assert caption.xml_captions == "test"
|
151 |
|
@@ -158,7 +162,7 @@ def test_generate_srt_captions(request):
|
|
158 |
"如要啓動字幕,請按一下這裡的圖示。</text></transcript>"
|
159 |
)
|
160 |
caption = Caption(
|
161 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
162 |
)
|
163 |
assert caption.generate_srt_captions() == (
|
164 |
"1\n"
|
|
|
14 |
|
15 |
def test_float_to_srt_time_format():
|
16 |
caption1 = Caption(
|
17 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
18 |
)
|
19 |
assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
|
20 |
|
21 |
|
22 |
def test_caption_query_sequence():
|
23 |
caption1 = Caption(
|
24 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
25 |
)
|
26 |
caption2 = Caption(
|
27 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
28 |
)
|
29 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
30 |
assert len(caption_query) == 2
|
|
|
36 |
|
37 |
def test_caption_query_get_by_language_code_when_exists():
|
38 |
caption1 = Caption(
|
39 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
40 |
)
|
41 |
caption2 = Caption(
|
42 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
43 |
)
|
44 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
45 |
assert caption_query["en"] == caption1
|
|
|
47 |
|
48 |
def test_caption_query_get_by_language_code_when_not_exists():
|
49 |
caption1 = Caption(
|
50 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
51 |
)
|
52 |
caption2 = Caption(
|
53 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
54 |
)
|
55 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
56 |
with pytest.raises(KeyError):
|
|
|
68 |
"url": "url1",
|
69 |
"name": {"simpleText": "name1"},
|
70 |
"languageCode": "en",
|
71 |
+
"vssId": ".en"
|
72 |
}
|
73 |
)
|
74 |
caption.download("title")
|
|
|
87 |
"url": "url1",
|
88 |
"name": {"simpleText": "name1"},
|
89 |
"languageCode": "en",
|
90 |
+
"vssId": ".en"
|
91 |
}
|
92 |
)
|
93 |
caption.download("title", filename_prefix="1 ")
|
|
|
108 |
"url": "url1",
|
109 |
"name": {"simpleText": "name1"},
|
110 |
"languageCode": "en",
|
111 |
+
"vssId": ".en"
|
112 |
}
|
113 |
)
|
114 |
file_path = caption.download("title", output_path="blah")
|
|
|
126 |
"url": "url1",
|
127 |
"name": {"simpleText": "name1"},
|
128 |
"languageCode": "en",
|
129 |
+
"vssId": ".en"
|
130 |
}
|
131 |
)
|
132 |
caption.download("title.xml", srt=False)
|
|
|
137 |
|
138 |
def test_repr():
|
139 |
caption = Caption(
|
140 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
141 |
)
|
142 |
assert str(caption) == '<Caption lang="name1" code="en">'
|
143 |
|
|
|
149 |
def test_xml_captions(request_get):
|
150 |
request_get.return_value = "test"
|
151 |
caption = Caption(
|
152 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
153 |
)
|
154 |
assert caption.xml_captions == "test"
|
155 |
|
|
|
162 |
"如要啓動字幕,請按一下這裡的圖示。</text></transcript>"
|
163 |
)
|
164 |
caption = Caption(
|
165 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
166 |
)
|
167 |
assert caption.generate_srt_captions() == (
|
168 |
"1\n"
|
tests/test_cli.py
CHANGED
@@ -71,7 +71,7 @@ def test_display_stream(youtube, stream):
|
|
71 |
def test_download_caption_with_none(youtube, print_available):
|
72 |
# Given
|
73 |
caption = Caption(
|
74 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
75 |
)
|
76 |
youtube.captions = CaptionQuery([caption])
|
77 |
# When
|
@@ -84,7 +84,7 @@ def test_download_caption_with_none(youtube, print_available):
|
|
84 |
def test_download_caption_with_language_found(youtube):
|
85 |
youtube.title = "video title"
|
86 |
caption = Caption(
|
87 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
88 |
)
|
89 |
caption.download = MagicMock(return_value="file_path")
|
90 |
youtube.captions = CaptionQuery([caption])
|
@@ -97,7 +97,7 @@ def test_download_caption_with_language_found(youtube):
|
|
97 |
def test_download_caption_with_lang_not_found(youtube, print_available):
|
98 |
# Given
|
99 |
caption = Caption(
|
100 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
101 |
)
|
102 |
youtube.captions = CaptionQuery([caption])
|
103 |
# When
|
@@ -109,10 +109,10 @@ def test_download_caption_with_lang_not_found(youtube, print_available):
|
|
109 |
def test_print_available_captions(capsys):
|
110 |
# Given
|
111 |
caption1 = Caption(
|
112 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
113 |
)
|
114 |
caption2 = Caption(
|
115 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
116 |
)
|
117 |
query = CaptionQuery([caption1, caption2])
|
118 |
# When
|
|
|
71 |
def test_download_caption_with_none(youtube, print_available):
|
72 |
# Given
|
73 |
caption = Caption(
|
74 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
75 |
)
|
76 |
youtube.captions = CaptionQuery([caption])
|
77 |
# When
|
|
|
84 |
def test_download_caption_with_language_found(youtube):
|
85 |
youtube.title = "video title"
|
86 |
caption = Caption(
|
87 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
88 |
)
|
89 |
caption.download = MagicMock(return_value="file_path")
|
90 |
youtube.captions = CaptionQuery([caption])
|
|
|
97 |
def test_download_caption_with_lang_not_found(youtube, print_available):
|
98 |
# Given
|
99 |
caption = Caption(
|
100 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
101 |
)
|
102 |
youtube.captions = CaptionQuery([caption])
|
103 |
# When
|
|
|
109 |
def test_print_available_captions(capsys):
|
110 |
# Given
|
111 |
caption1 = Caption(
|
112 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
113 |
)
|
114 |
caption2 = Caption(
|
115 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
116 |
)
|
117 |
query = CaptionQuery([caption1, caption2])
|
118 |
# When
|