Dionyssos commited on
Commit
972caea
·
1 Parent(s): 2d0e2b6
Files changed (4) hide show
  1. README.md +15 -6
  2. api.py +1 -0
  3. landscape2soundscape.py +5 -90
  4. logo_raw_smb_aud.png +0 -0
README.md CHANGED
@@ -18,12 +18,14 @@ tags:
18
  ---
19
 
20
 
21
- # Affective TTS & Soundscape Synthesis
22
 
23
- Affective TTS tool for [SHIFT Horizon](https://shift-europe.eu/).
24
- - Synthesizes affective speech with sound scape, trees, water, leaves, background from plain text or subtitles (.srt) & overlays it to videos.
 
 
 
25
  - `134` build-in affective voices available, tuned for [StyleTTS2](https://github.com/yl4579/StyleTTS2).
26
- - [GitHub](https://github.com/audeering/shift)
27
 
28
  ### Available Voices
29
 
@@ -40,7 +42,7 @@ cd shift/
40
  pip install -r requirements.txt
41
  ```
42
 
43
- Start Flask
44
 
45
  ```
46
  CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=./hf_home CUDA_VISIBLE_DEVICES=2 python api.py
@@ -48,7 +50,7 @@ CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=./hf_home CUDA_VISIBLE_DEVICES=2 python api
48
 
49
  ## Inference
50
 
51
- The following need `api.py` to be running, e.g. `.. on computeXX`.
52
 
53
  **Text 2 Speech**
54
 
@@ -77,6 +79,13 @@ python tts.py --text assets/head_of_fortuna_en.srt --video assets/head_of_fortun
77
  python tts.py --text assets/head_of_fortuna_GPT.txt --video assets/head_of_fortuna.mp4
78
  ```
79
 
 
 
 
 
 
 
 
80
  ## Examples
81
 
82
  Substitute Native voice via TTS
 
18
  ---
19
 
20
 
21
+ # Affective TTS & Soundscapes
22
 
23
+ Synthesize affective TTS using [SHIFT TTS tool](https://github.com/audeering/shift), as well as audio soundscape.
24
+ - Affective TTS is based on this [phenomenon](https://huggingface.co/dkounadis/artificial-styletts2/discussions/2)
25
+ - Soundscapes, e.g. trees, water, leaves, are text-described generations from [AudioGen](https://huggingface.co/dkounadis/artificial-styletts2/discussions/3)
26
+ - `landscape2soundscape.py` shows how to example
27
+ - plain text or subtitles (.srt) & overlays it to videos.
28
  - `134` build-in affective voices available, tuned for [StyleTTS2](https://github.com/yl4579/StyleTTS2).
 
29
 
30
  ### Available Voices
31
 
 
42
  pip install -r requirements.txt
43
  ```
44
 
45
+ Flask
46
 
47
  ```
48
  CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=./hf_home CUDA_VISIBLE_DEVICES=2 python api.py
 
50
 
51
  ## Inference
52
 
53
+ The following need `api.py` to be running on a `tmux session`.
54
 
55
  **Text 2 Speech**
56
 
 
79
  python tts.py --text assets/head_of_fortuna_GPT.txt --video assets/head_of_fortuna.mp4
80
  ```
81
 
82
+ **Landscape 2 Soundscape**
83
+
84
+ ```python
85
+ # TTS & soundscape - overlay to .mp4
86
+ python landscape2soundscape.py
87
+ ```
88
+
89
  ## Examples
90
 
91
  Substitute Native voice via TTS
api.py CHANGED
@@ -396,6 +396,7 @@ def serve_wav():
396
  print(f'\n=SERVER saved as {OUT_FILE=}\n')
397
  response = send_from_directory(CACHE_DIR, path=OUT_FILE)
398
  response.headers['suffix-file-type'] = OUT_FILE
 
399
  return response
400
 
401
 
 
396
  print(f'\n=SERVER saved as {OUT_FILE=}\n')
397
  response = send_from_directory(CACHE_DIR, path=OUT_FILE)
398
  response.headers['suffix-file-type'] = OUT_FILE
399
+ print('_________________________________________________________\n ? \n_______________')
400
  return response
401
 
402
 
landscape2soundscape.py CHANGED
@@ -3,28 +3,18 @@ import subprocess
3
  import cv2
4
 
5
  # with subprocess and an extra argument 'scene' and a 'resized image saved as png' we can call the server
6
-
7
  # yt-dlp is instaled in .d4
8
  # Download Part of Video
9
  # yt-dlp https://www.youtube.com/watch?v=UZ9uyQI3pF0 --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 997 -to 2512"
10
  # ffmpeg -i Sandra\ Kotevska\,\ Painting\ Rose\ bush\,\ mixed\ media\,\ 2017.\ \[NMzC_036MtE\].mkv -f mp3 -ar 22050 -vn out44.wav -ac 1
11
-
12
  # https://superuser.com/questions/583393/how-to-extract-subtitle-from-video-using-ffmpeg
13
-
14
- def _shift(x):
15
- n = x.shape[0]
16
- i = np.random.randint(.24 * n, .74 * n)
17
- return np.roll(x, i)
18
-
19
  #___________________________________________________________________________________________________
20
  # VIDEO FROM IMAGE with CAPTIONS
21
  #
22
  # UPLOAD to: Simaviro: Documents General WORK PACKAGES WP1 ContentRepository ANBPR_ROMANIA TTSvideos
23
  # __________________________________________________________________________________________________
24
-
25
  # TO DONLOAD SRT for youtub
26
  # yt-dlp --write-sub --sub-lang en --convert-subs "srt" https://www.youtube.com/watch?v=F1Ib7TAu7eg&list=PL4x2B6LSwFewdDvRnUTpBM7jkmpwouhPv&index=2
27
-
28
  # _voice = 'en_US/vctk_low#p330'
29
  # _voice = 'en_US/cmu-arctic_low#lnh' #en_US/vctk_low#p249' # 'en_US/vctk_low#p282'
30
  # _voice = ''en_US/vctk_low#p351''
@@ -93,7 +83,7 @@ DESCRIPTIONS = [
93
  ],
94
  # 6
95
  [
96
- '06_Menzel_AI900_001.jpg'
97
  '06_Menzel_AI900_001.txt',
98
  'Olive trees in Seville',
99
  'Adolph Menzel - Bauplatz mit Weiden - 1846',
@@ -181,92 +171,17 @@ for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:
181
  offset_h = 24
182
  im[offset_h:h+offset_h, :w, :] = (.4 * im[offset_h:h+offset_h, :w, :] + .6 * fram).astype(np.uint8)
183
  # cv2.imshow('i', im); cv2.waitKey(); cv2.destroyAllWindows()
184
-
185
- # logo aud
186
-
187
- logo = cv2.imread('assets/audeering_logo.jpg')[:740, :, :]
188
- logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
189
- h, w, _ = logo.shape
190
- offset_h = im.shape[0] - h
191
- im[offset_h:h+offset_h, :w, :] = (.23 * im[offset_h:h+offset_h, :w, :] + .77 * logo).astype(np.uint8)
192
-
193
- # logo SMB
194
-
195
- logo = cv2.imread('assets/SMB_logo.png')#[:740, :, :]
196
- logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
197
- h, w, _ = logo.shape
198
- offset_h = im.shape[0] - h
199
- # fill logo SMB with the pixels of im - where SMB is empty
200
- ptc = im[offset_h:h+offset_h, :w, :]
201
- logo[logo == 0] = ptc[logo == 0] # fill empty
202
- im[offset_h:h+offset_h, :w, :] = (.13 * im[offset_h:h+offset_h, :w, :] + .86 * logo).astype(np.uint8)
203
-
204
- # # logo shift
205
-
206
- # logo = cv2.imread('assets/shift_logo.png')#[:740, :, :]
207
- # logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
208
- # h, w, _ = logo.shape
209
- # offset_h = im.shape[0] - h #-274
210
- # offset_w = im.shape[1] - w #400
211
- # # # fill logo SMB with the pixels of im - where SMB is empty
212
- # ptc = im[offset_h:h+offset_h, :w, :]
213
- # # msk = np.tile(logo[:, :,0:1] > 252, [1,1,3])
214
- # # logo[msk] = ptc[msk] # fill empty
215
- # im[offset_h:h+offset_h, offset_w:w+offset_w, :] = (.0 * im[offset_h:h+offset_h, offset_w:w+offset_w, :] + 1 * logo).astype(np.uint8)
216
-
217
- # silent video - img
218
- # im = cv2.resize(im, (700, 700))
219
- cv2.imwrite('pic_logo_emb.png', im)
220
-
221
-
222
-
223
-
224
- # raw, _ = soundfile.read(soundscape_file) # 12345, 2
225
-
226
- # # fill
227
- # soundscape = []
228
- # for _replica in range(math.ceil(len(total) / raw.shape[0])+1):
229
- # soundscape.append(raw) # _shift non defined for stereo
230
- # soundscape = np.concatenate(soundscape, 0)
231
-
232
- # total = .36 * np.concatenate([total[:, None],
233
- # total[:, None]], 1) + .64 * soundscape[:len(total), :]
234
-
235
- # outfile
236
-
237
  OUT_FILE = _img_.split('/')[-1].replace('.','__') + '.mp4' # assets / -1
238
  print(f'{OUT_FILE=}\n')
239
- # call API passing img
240
-
241
  subprocess.run(
242
  [
243
  "python",
244
  "tts.py",
245
  "--text", PIC_DIR + _text_,
246
- '--image', 'pic_logo_emb.png',
247
  # "--title", _title_,
248
  # '--soundscape_text', soundscape_text,
249
  '--voice', _voice_,
250
- '--out_file', OUT_FILE,
251
- ])
252
-
253
- # soundfile.write(AUDIO_TRACK, total, 22050)
254
- # subprocess.call(
255
- # ["ffmpeg",
256
- # "-y",
257
- # "-i",
258
- # SILENT_VIDEO,
259
- # "-i",
260
- # AUDIO_TRACK,
261
- # #"-c:v",
262
- # #"copy",
263
- # "-map",
264
- # "0:v:0",
265
- # "-map",
266
- # " 1:a:0",
267
- # "-vf",
268
- # "pad",
269
- # OUT_FILE])
270
-
271
-
272
-
 
3
  import cv2
4
 
5
  # with subprocess and an extra argument 'scene' and a 'resized image saved as png' we can call the server
 
6
  # yt-dlp is instaled in .d4
7
  # Download Part of Video
8
  # yt-dlp https://www.youtube.com/watch?v=UZ9uyQI3pF0 --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 997 -to 2512"
9
  # ffmpeg -i Sandra\ Kotevska\,\ Painting\ Rose\ bush\,\ mixed\ media\,\ 2017.\ \[NMzC_036MtE\].mkv -f mp3 -ar 22050 -vn out44.wav -ac 1
 
10
  # https://superuser.com/questions/583393/how-to-extract-subtitle-from-video-using-ffmpeg
 
 
 
 
 
 
11
  #___________________________________________________________________________________________________
12
  # VIDEO FROM IMAGE with CAPTIONS
13
  #
14
  # UPLOAD to: Simaviro: Documents General WORK PACKAGES WP1 ContentRepository ANBPR_ROMANIA TTSvideos
15
  # __________________________________________________________________________________________________
 
16
  # TO DONLOAD SRT for youtub
17
  # yt-dlp --write-sub --sub-lang en --convert-subs "srt" https://www.youtube.com/watch?v=F1Ib7TAu7eg&list=PL4x2B6LSwFewdDvRnUTpBM7jkmpwouhPv&index=2
 
18
  # _voice = 'en_US/vctk_low#p330'
19
  # _voice = 'en_US/cmu-arctic_low#lnh' #en_US/vctk_low#p249' # 'en_US/vctk_low#p282'
20
  # _voice = ''en_US/vctk_low#p351''
 
83
  ],
84
  # 6
85
  [
86
+ '06_Menzel_AI900_001.jpg',
87
  '06_Menzel_AI900_001.txt',
88
  'Olive trees in Seville',
89
  'Adolph Menzel - Bauplatz mit Weiden - 1846',
 
171
  offset_h = 24
172
  im[offset_h:h+offset_h, :w, :] = (.4 * im[offset_h:h+offset_h, :w, :] + .6 * fram).astype(np.uint8)
173
  # cv2.imshow('i', im); cv2.waitKey(); cv2.destroyAllWindows()
174
+ cv2.imwrite('_tmp_banner.png', im)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  OUT_FILE = _img_.split('/')[-1].replace('.','__') + '.mp4' # assets / -1
176
  print(f'{OUT_FILE=}\n')
 
 
177
  subprocess.run(
178
  [
179
  "python",
180
  "tts.py",
181
  "--text", PIC_DIR + _text_,
182
+ '--image', '_tmp_banner.png',
183
  # "--title", _title_,
184
  # '--soundscape_text', soundscape_text,
185
  '--voice', _voice_,
186
+ '--out_file', OUT_FILE, # save to correct location is handled in client
187
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logo_raw_smb_aud.png ADDED