Taylor Fox Dahlin commited on
Commit
7ad4dc3
·
unverified ·
1 Parent(s): 9e778d2

Improved caption selection, and updated tests to reflect this. (#783)

Browse files
Files changed (3) hide show
  1. pytube/captions.py +6 -1
  2. tests/test_captions.py +14 -10
  3. tests/test_cli.py +5 -5
pytube/captions.py CHANGED
@@ -23,7 +23,12 @@ class Caption:
23
  """
24
  self.url = caption_track.get("baseUrl")
25
  self.name = caption_track["name"]["simpleText"]
26
- self.code = caption_track["languageCode"]
 
 
 
 
 
27
 
28
  @property
29
  def xml_captions(self) -> str:
 
23
  """
24
  self.url = caption_track.get("baseUrl")
25
  self.name = caption_track["name"]["simpleText"]
26
+ # Use "vssId" instead of "languageCode", fix issue #779
27
+ self.code = caption_track["vssId"]
28
+ # Remove preceding '.' for backwards compatibility, e.g.:
29
+ # English -> vssId: .en, languageCode: en
30
+ # English (auto-generated) -> vssId: a.en, languageCode: en
31
+ self.code = self.code.strip('.')
32
 
33
  @property
34
  def xml_captions(self) -> str:
tests/test_captions.py CHANGED
@@ -14,17 +14,17 @@ from pytube import captions
14
 
15
  def test_float_to_srt_time_format():
16
  caption1 = Caption(
17
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
18
  )
19
  assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
20
 
21
 
22
  def test_caption_query_sequence():
23
  caption1 = Caption(
24
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
25
  )
26
  caption2 = Caption(
27
- {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
28
  )
29
  caption_query = CaptionQuery(captions=[caption1, caption2])
30
  assert len(caption_query) == 2
@@ -36,10 +36,10 @@ def test_caption_query_sequence():
36
 
37
  def test_caption_query_get_by_language_code_when_exists():
38
  caption1 = Caption(
39
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
40
  )
41
  caption2 = Caption(
42
- {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
43
  )
44
  caption_query = CaptionQuery(captions=[caption1, caption2])
45
  assert caption_query["en"] == caption1
@@ -47,10 +47,10 @@ def test_caption_query_get_by_language_code_when_exists():
47
 
48
  def test_caption_query_get_by_language_code_when_not_exists():
49
  caption1 = Caption(
50
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
51
  )
52
  caption2 = Caption(
53
- {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
54
  )
55
  caption_query = CaptionQuery(captions=[caption1, caption2])
56
  with pytest.raises(KeyError):
@@ -68,6 +68,7 @@ def test_download(srt):
68
  "url": "url1",
69
  "name": {"simpleText": "name1"},
70
  "languageCode": "en",
 
71
  }
72
  )
73
  caption.download("title")
@@ -86,6 +87,7 @@ def test_download_with_prefix(srt):
86
  "url": "url1",
87
  "name": {"simpleText": "name1"},
88
  "languageCode": "en",
 
89
  }
90
  )
91
  caption.download("title", filename_prefix="1 ")
@@ -106,6 +108,7 @@ def test_download_with_output_path(srt):
106
  "url": "url1",
107
  "name": {"simpleText": "name1"},
108
  "languageCode": "en",
 
109
  }
110
  )
111
  file_path = caption.download("title", output_path="blah")
@@ -123,6 +126,7 @@ def test_download_xml_and_trim_extension(xml):
123
  "url": "url1",
124
  "name": {"simpleText": "name1"},
125
  "languageCode": "en",
 
126
  }
127
  )
128
  caption.download("title.xml", srt=False)
@@ -133,7 +137,7 @@ def test_download_xml_and_trim_extension(xml):
133
 
134
  def test_repr():
135
  caption = Caption(
136
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
137
  )
138
  assert str(caption) == '<Caption lang="name1" code="en">'
139
 
@@ -145,7 +149,7 @@ def test_repr():
145
  def test_xml_captions(request_get):
146
  request_get.return_value = "test"
147
  caption = Caption(
148
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
149
  )
150
  assert caption.xml_captions == "test"
151
 
@@ -158,7 +162,7 @@ def test_generate_srt_captions(request):
158
  "如要啓動字幕,請按一下這裡的圖示。</text></transcript>"
159
  )
160
  caption = Caption(
161
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
162
  )
163
  assert caption.generate_srt_captions() == (
164
  "1\n"
 
14
 
15
  def test_float_to_srt_time_format():
16
  caption1 = Caption(
17
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
18
  )
19
  assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
20
 
21
 
22
  def test_caption_query_sequence():
23
  caption1 = Caption(
24
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
25
  )
26
  caption2 = Caption(
27
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
28
  )
29
  caption_query = CaptionQuery(captions=[caption1, caption2])
30
  assert len(caption_query) == 2
 
36
 
37
  def test_caption_query_get_by_language_code_when_exists():
38
  caption1 = Caption(
39
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
40
  )
41
  caption2 = Caption(
42
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
43
  )
44
  caption_query = CaptionQuery(captions=[caption1, caption2])
45
  assert caption_query["en"] == caption1
 
47
 
48
  def test_caption_query_get_by_language_code_when_not_exists():
49
  caption1 = Caption(
50
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
51
  )
52
  caption2 = Caption(
53
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
54
  )
55
  caption_query = CaptionQuery(captions=[caption1, caption2])
56
  with pytest.raises(KeyError):
 
68
  "url": "url1",
69
  "name": {"simpleText": "name1"},
70
  "languageCode": "en",
71
+ "vssId": ".en"
72
  }
73
  )
74
  caption.download("title")
 
87
  "url": "url1",
88
  "name": {"simpleText": "name1"},
89
  "languageCode": "en",
90
+ "vssId": ".en"
91
  }
92
  )
93
  caption.download("title", filename_prefix="1 ")
 
108
  "url": "url1",
109
  "name": {"simpleText": "name1"},
110
  "languageCode": "en",
111
+ "vssId": ".en"
112
  }
113
  )
114
  file_path = caption.download("title", output_path="blah")
 
126
  "url": "url1",
127
  "name": {"simpleText": "name1"},
128
  "languageCode": "en",
129
+ "vssId": ".en"
130
  }
131
  )
132
  caption.download("title.xml", srt=False)
 
137
 
138
  def test_repr():
139
  caption = Caption(
140
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
141
  )
142
  assert str(caption) == '<Caption lang="name1" code="en">'
143
 
 
149
  def test_xml_captions(request_get):
150
  request_get.return_value = "test"
151
  caption = Caption(
152
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
153
  )
154
  assert caption.xml_captions == "test"
155
 
 
162
  "如要啓動字幕,請按一下這裡的圖示。</text></transcript>"
163
  )
164
  caption = Caption(
165
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
166
  )
167
  assert caption.generate_srt_captions() == (
168
  "1\n"
tests/test_cli.py CHANGED
@@ -71,7 +71,7 @@ def test_display_stream(youtube, stream):
71
  def test_download_caption_with_none(youtube, print_available):
72
  # Given
73
  caption = Caption(
74
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
75
  )
76
  youtube.captions = CaptionQuery([caption])
77
  # When
@@ -84,7 +84,7 @@ def test_download_caption_with_none(youtube, print_available):
84
  def test_download_caption_with_language_found(youtube):
85
  youtube.title = "video title"
86
  caption = Caption(
87
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
88
  )
89
  caption.download = MagicMock(return_value="file_path")
90
  youtube.captions = CaptionQuery([caption])
@@ -97,7 +97,7 @@ def test_download_caption_with_language_found(youtube):
97
  def test_download_caption_with_lang_not_found(youtube, print_available):
98
  # Given
99
  caption = Caption(
100
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
101
  )
102
  youtube.captions = CaptionQuery([caption])
103
  # When
@@ -109,10 +109,10 @@ def test_download_caption_with_lang_not_found(youtube, print_available):
109
  def test_print_available_captions(capsys):
110
  # Given
111
  caption1 = Caption(
112
- {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
113
  )
114
  caption2 = Caption(
115
- {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
116
  )
117
  query = CaptionQuery([caption1, caption2])
118
  # When
 
71
  def test_download_caption_with_none(youtube, print_available):
72
  # Given
73
  caption = Caption(
74
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
75
  )
76
  youtube.captions = CaptionQuery([caption])
77
  # When
 
84
  def test_download_caption_with_language_found(youtube):
85
  youtube.title = "video title"
86
  caption = Caption(
87
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
88
  )
89
  caption.download = MagicMock(return_value="file_path")
90
  youtube.captions = CaptionQuery([caption])
 
97
  def test_download_caption_with_lang_not_found(youtube, print_available):
98
  # Given
99
  caption = Caption(
100
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
101
  )
102
  youtube.captions = CaptionQuery([caption])
103
  # When
 
109
  def test_print_available_captions(capsys):
110
  # Given
111
  caption1 = Caption(
112
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
113
  )
114
  caption2 = Caption(
115
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
116
  )
117
  query = CaptionQuery([caption1, caption2])
118
  # When