Dionyssos commited on
Commit
3844e6c
·
1 Parent(s): 9cbdf67

per-sentence scripts / for cer

Browse files
Files changed (2) hide show
  1. correct_figure.py +1 -1
  2. visualize_per_sentence.py +244 -0
correct_figure.py CHANGED
@@ -299,7 +299,7 @@ for audio_prompt in ['english',
299
  'foreign',
300
  'foreign_4x']: # each of these creates a separate pkl - so outer for
301
  #
302
- data = np.zeros((767, len(LABELS)*2 + 2)) # 720 x LABELS-prompt & LABELS-stts2 & cer-prompt & cer-stts2
303
 
304
 
305
 
 
299
  'foreign',
300
  'foreign_4x']: # each of these creates a separate pkl - so outer for
301
  #
302
+ data = np.zeros((770, len(LABELS)*2 + 2)) # 768 x LABELS-prompt & LABELS-stts2 & cer-prompt & cer-stts2
303
 
304
 
305
 
visualize_per_sentence.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PREREQUISITY
2
+
3
+ # correct_figure.py -> makes analytic.pkl & CER -> per sentence No Audinterface sliding window
4
+ import pandas as pd
5
+ import os
6
+ import numpy as np
7
+ from pathlib import Path
8
+ import matplotlib.pyplot as plt
9
+ import audiofile
10
+
11
+ columns = ['prompt-arousal',
12
+ 'prompt-dominance',
13
+ 'prompt-valence',
14
+ 'prompt-Angry',
15
+ 'prompt-Sad',
16
+ 'prompt-Happy',
17
+ 'prompt-Surprise',
18
+ 'prompt-Fear',
19
+ 'prompt-Disgust',
20
+ 'prompt-Contempt',
21
+ 'prompt-Neutral',
22
+ 'styletts2-arousal',
23
+ 'styletts2-dominance',
24
+ 'styletts2-valence',
25
+ 'styletts2-Angry',
26
+ 'styletts2-Sad',
27
+ 'styletts2-Happy',
28
+ 'styletts2-Surprise',
29
+ 'styletts2-Fear',
30
+ 'styletts2-Disgust',
31
+ 'styletts2-Contempt',
32
+ 'styletts2-Neutral',
33
+ 'cer-prompt',
34
+ 'cer-styletts2']
35
+
36
+ FULL_PKL = ['english_4x_analytic.pkl',
37
+ 'english_analytic.pkl',
38
+ 'foreign_4x_analytic.pkl',
39
+ 'foreign_analytic.pkl',
40
+ 'human_analytic.pkl']
41
+ # -------------------------------------------
42
+
43
+
44
+
45
+ LABELS = ['arousal', 'dominance', 'valence',
46
+ # 'speech_synthesizer', 'synthetic_singing',
47
+ 'Angry',
48
+ 'Sad',
49
+ 'Happy',
50
+ 'Surprise',
51
+ 'Fear',
52
+ 'Disgust',
53
+ 'Contempt',
54
+ 'Neutral'
55
+ ]
56
+
57
+
58
+
59
+
60
+ # https://arxiv.org/pdf/2407.12229
61
+ # https://arxiv.org/pdf/2312.05187
62
+ # https://arxiv.org/abs/2407.05407
63
+ # https://arxiv.org/pdf/2408.06577
64
+ # https://arxiv.org/pdf/2309.07405
65
+ preds = {}
66
+
67
+ for file_interface in FULL_PKL:
68
+ y = pd.read_pickle(file_interface)
69
+ preds[file_interface] = y
70
+
71
+
72
+
73
+ for lang in ['english',
74
+ 'foreign']:
75
+
76
+
77
+ fig, ax = plt.subplots(nrows=8, ncols=2, figsize=(24,20.7),
78
+ gridspec_kw={'hspace': 0, 'wspace': .04})
79
+
80
+
81
+
82
+
83
+ time_stamp = np.arange(len(preds['english_analytic.pkl']))
84
+ _z = np.zeros(len(preds['english_analytic.pkl']))
85
+ for j, dim in enumerate(['arousal', 'dominance', 'valence']):
86
+
87
+ # MIMIC3
88
+
89
+ ax[j, 0].plot(time_stamp, preds[f'{lang}_analytic.pkl'][f'styletts2-{dim}'],
90
+ color=(0,104/255,139/255),
91
+ label='mean_1',
92
+ linewidth=2)
93
+ ax[j, 0].fill_between(time_stamp,
94
+
95
+ _z,
96
+ preds['human_analytic.pkl'][f'styletts2-{dim}'],
97
+
98
+ color=(.2,.2,.2),
99
+ alpha=0.244)
100
+ if j == 0:
101
+ if lang == 'english':
102
+ desc = 'English'
103
+ else:
104
+ desc = 'Non-English'
105
+ ax[j, 0].legend([f'StyleTTS2 using Mimic-3 {desc}',
106
+ f'StyleTTS2 uising EmoDB'],
107
+ prop={'size': 14},
108
+ )
109
+ ax[j, 0].set_ylabel(dim.lower(), color=(.4, .4, .4), fontsize=17)
110
+
111
+ # TICK
112
+ ax[j, 0].set_ylim([1e-7, .9999])
113
+ # ax[j, 0].set_yticks([.25, .5,.75])
114
+ # ax[j, 0].set_yticklabels(['0.25', '.5', '0.75'])
115
+ ax[j, 0].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()])
116
+ ax[j, 0].set_xlim([time_stamp[0], time_stamp[-1]])
117
+
118
+
119
+ # MIMIC3 4x speed
120
+
121
+
122
+ ax[j, 1].plot(time_stamp, preds[f'{lang}_4x_analytic.pkl'][f'styletts2-{dim}'],
123
+ color=(0,104/255,139/255),
124
+ label='mean_1',
125
+ linewidth=2)
126
+ ax[j, 1].fill_between(time_stamp,
127
+
128
+ _z,
129
+ preds['human_analytic.pkl'][f'styletts2-{dim}'],
130
+
131
+ color=(.2,.2,.2),
132
+ alpha=0.244)
133
+ if j == 0:
134
+ if lang == 'english':
135
+ desc = 'English'
136
+ else:
137
+ desc = 'Non-English'
138
+ ax[j, 1].legend([f'StyleTTS2 using Mimic-3 {desc} 4x speed',
139
+ f'StyleTTS2 using EmoDB'],
140
+ prop={'size': 14},
141
+ # loc='lower right'
142
+ )
143
+
144
+
145
+ ax[j, 1].set_xlabel('720 Harvard Sentences')
146
+
147
+
148
+
149
+ # TICK
150
+ ax[j, 1].set_ylim([1e-7, .9999])
151
+ # ax[j, 1].set_yticklabels(['' for _ in ax[j, 1].get_yticklabels()])
152
+ ax[j, 1].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()])
153
+ ax[j, 1].set_xlim([time_stamp[0], time_stamp[-1]])
154
+
155
+
156
+
157
+
158
+ ax[j, 0].grid()
159
+ ax[j, 1].grid()
160
+ # CATEGORIE
161
+
162
+
163
+
164
+
165
+
166
+
167
+ for j, dim in enumerate(['Angry',
168
+ 'Sad',
169
+ 'Happy',
170
+ # 'Surprise',
171
+ 'Fear',
172
+ 'Disgust',
173
+ # 'Contempt',
174
+ # 'Neutral'
175
+ ]): # ASaHSuFDCN
176
+ j = j + 3 # skip A/D/V suplt
177
+
178
+ # MIMIC3
179
+
180
+ ax[j, 0].plot(time_stamp, preds[f'{lang}_analytic.pkl'][f'styletts2-{dim}'],
181
+ color=(0,104/255,139/255),
182
+ label='mean_1',
183
+ linewidth=2)
184
+ ax[j, 0].fill_between(time_stamp,
185
+
186
+ _z,
187
+ preds['human_analytic.pkl'][f'styletts2-{dim}'],
188
+
189
+ color=(.2,.2,.2),
190
+ alpha=0.244)
191
+ # ax[j, 0].legend(['StyleTTS2 style mimic3',
192
+ # 'StyleTTS2 style crema-d'],
193
+ # prop={'size': 10},
194
+ # # loc='upper left'
195
+ # )
196
+
197
+
198
+ ax[j, 0].set_ylabel(dim.lower(), color=(.4, .4, .4), fontsize=17)
199
+
200
+ # TICKS
201
+ ax[j, 0].set_ylim([1e-7, .9999])
202
+ ax[j, 0].set_xlim([time_stamp[0], time_stamp[-1]])
203
+ ax[j, 0].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()])
204
+ ax[j, 0].set_xlabel('720 Harvard Sentences', fontsize=17, color=(.2,.2,.2))
205
+
206
+
207
+ # MIMIC3 4x speed
208
+
209
+
210
+ ax[j, 1].plot(time_stamp, preds[f'{lang}_4x_analytic.pkl'][f'styletts2-{dim}'],
211
+ color=(0,104/255,139/255),
212
+ label='mean_1',
213
+ linewidth=2)
214
+ ax[j, 1].fill_between(time_stamp,
215
+
216
+ _z,
217
+ preds['human_analytic.pkl'][f'styletts2-{dim}'],
218
+
219
+ color=(.2,.2,.2),
220
+ alpha=0.244)
221
+ # ax[j, 1].legend(['StyleTTS2 style mimic3 4x speed',
222
+ # 'StyleTTS2 style crema-d'],
223
+ # prop={'size': 10},
224
+ # # loc='upper left'
225
+ # )
226
+ ax[j, 1].set_xlabel('720 Harvard Sentences', fontsize=17, color=(.2,.2,.2))
227
+ ax[j, 1].set_ylim([1e-7, .9999])
228
+ # ax[j, 1].set_yticklabels(['' for _ in ax[j, 1].get_yticklabels()])
229
+ ax[j, 1].set_xticklabels(['' for _ in ax[j, 1].get_xticklabels()])
230
+ ax[j, 1].set_xlim([time_stamp[0], time_stamp[-1]])
231
+
232
+
233
+
234
+
235
+
236
+
237
+ ax[j, 0].grid()
238
+ ax[j, 1].grid()
239
+
240
+
241
+
242
+ plt.savefig(f'persentence_{lang}.pdf', bbox_inches='tight')
243
+ plt.close()
244
+