aadnk commited on
Commit
1a68fc3
·
1 Parent(s): 08d2e96

Wrap lines in SRT and VTT files that exceed 47 characters

Browse files
Files changed (1) hide show
  1. utils.py +11 -2
utils.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import unicodedata
2
  import re
3
 
@@ -55,9 +56,11 @@ def write_txt(transcript: Iterator[dict], file: TextIO):
55
  def write_vtt(transcript: Iterator[dict], file: TextIO):
56
  print("WEBVTT\n", file=file)
57
  for segment in transcript:
 
 
58
  print(
59
  f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
60
- f"{segment['text'].replace('-->', '->')}\n",
61
  file=file,
62
  flush=True,
63
  )
@@ -76,16 +79,22 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
76
  write_srt(result["segments"], file=srt)
77
  """
78
  for i, segment in enumerate(transcript, start=1):
 
 
79
  # write srt lines
80
  print(
81
  f"{i}\n"
82
  f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
83
  f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
84
- f"{segment['text'].strip().replace('-->', '->')}\n",
85
  file=file,
86
  flush=True,
87
  )
88
 
 
 
 
 
89
  def slugify(value, allow_unicode=False):
90
  """
91
  Taken from https://github.com/django/django/blob/master/django/utils/text.py
 
1
+ import textwrap
2
  import unicodedata
3
  import re
4
 
 
56
  def write_vtt(transcript: Iterator[dict], file: TextIO):
57
  print("WEBVTT\n", file=file)
58
  for segment in transcript:
59
+ text = processText(segment['text']).replace('-->', '->')
60
+
61
  print(
62
  f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
63
+ f"{text}\n",
64
  file=file,
65
  flush=True,
66
  )
 
79
  write_srt(result["segments"], file=srt)
80
  """
81
  for i, segment in enumerate(transcript, start=1):
82
+ text = processText(segment['text'].strip()).replace('-->', '->')
83
+
84
  # write srt lines
85
  print(
86
  f"{i}\n"
87
  f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
88
  f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
89
+ f"{text}\n",
90
  file=file,
91
  flush=True,
92
  )
93
 
94
+ def processText(text: str):
95
+ lines = textwrap.wrap(text, width=47, tabsize=4)
96
+ return '\n'.join(lines)
97
+
98
  def slugify(value, allow_unicode=False):
99
  """
100
  Taken from https://github.com/django/django/blob/master/django/utils/text.py