Spaces:
Running
Running
Update index.html
Browse files- index.html +822 -523
index.html
CHANGED
@@ -1,585 +1,884 @@
|
|
1 |
<!doctype html>
|
2 |
<html lang="en-us">
|
3 |
-
|
4 |
-
<head>
|
5 |
<title>whisper.cpp : WASM example</title>
|
6 |
|
7 |
<style>
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
white-space: pre;
|
28 |
-
overflow-wrap: normal;
|
29 |
-
overflow-x: scroll;
|
30 |
-
}
|
31 |
</style>
|
32 |
-
</head>
|
33 |
-
|
34 |
-
<body>
|
35 |
<div id="main-container">
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
<
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
<
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
</
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
<
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
<
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
207 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
</div>
|
209 |
|
210 |
<script type="text/javascript" src="helpers.js"></script>
|
211 |
-
<script type=
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
}
|
217 |
-
})
|
218 |
-
// TODO: convert audio buffer to WAV
|
219 |
-
function setAudio(audio) {
|
220 |
-
//if (audio) {
|
221 |
-
// // convert to 16-bit PCM
|
222 |
-
// var blob = new Blob([audio], { type: 'audio/wav' });
|
223 |
-
// var url = URL.createObjectURL(blob);
|
224 |
-
// document.getElementById('source').src = url;
|
225 |
-
// document.getElementById('audio').hidden = false;
|
226 |
-
// document.getElementById('audio').loop = false;
|
227 |
-
// document.getElementById('audio').load();
|
228 |
-
//} else {
|
229 |
-
// document.getElementById('audio').hidden = true;
|
230 |
-
//}
|
231 |
}
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
}
|
244 |
|
245 |
-
|
246 |
-
print: printTextarea,
|
247 |
-
printErr: printTextarea,
|
248 |
-
setStatus: function (text) {
|
249 |
-
printTextarea('js: ' + text);
|
250 |
-
},
|
251 |
-
monitorRunDependencies: function (left) {
|
252 |
-
}
|
253 |
-
};
|
254 |
|
255 |
-
//
|
256 |
-
var context = null;
|
257 |
|
258 |
-
|
259 |
-
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
270 |
}
|
271 |
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
|
291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
}
|
297 |
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
}
|
303 |
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
311 |
}
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
}
|
321 |
|
322 |
-
|
323 |
-
|
324 |
-
'tiny.en': 'models/ggml-tiny.en.bin',
|
325 |
-
'tiny': 'models/ggml-tiny.bin',
|
326 |
-
'base.en': 'models/ggml-base.en.bin',
|
327 |
-
'base': 'models/ggml-base.bin',
|
328 |
-
'extra': 'https://huggingface.co/radames/general/resolve/main/ggml-tiny.bin'
|
329 |
-
|
330 |
-
};
|
331 |
-
|
332 |
-
let sizes = {
|
333 |
-
'tiny.en': 75,
|
334 |
-
'tiny': 75,
|
335 |
-
'base.en': 142,
|
336 |
-
'base': 142,
|
337 |
-
};
|
338 |
|
339 |
-
|
340 |
-
|
341 |
-
let size_mb = sizes[model];
|
342 |
|
343 |
-
|
|
|
344 |
|
345 |
-
|
346 |
-
|
347 |
-
document.getElementById('fetch-whisper-tiny').style.display = 'none';
|
348 |
-
document.getElementById('fetch-whisper-base').style.display = 'none';
|
349 |
-
document.getElementById('whisper-file').style.display = 'none';
|
350 |
-
document.getElementById('model-whisper-status').innerHTML = 'loading model: ' + model;
|
351 |
-
|
352 |
-
cbProgress = function (p) {
|
353 |
-
let el = document.getElementById('fetch-whisper-progress');
|
354 |
-
el.innerHTML = Math.round(100 * p) + '%';
|
355 |
-
};
|
356 |
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
el = document.getElementById('model-whisper-status'); if (el) el.innerHTML = '';
|
365 |
};
|
|
|
|
|
|
|
366 |
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
//
|
371 |
-
// audio file
|
372 |
-
//
|
373 |
-
|
374 |
-
const kMaxAudio_s = 120;
|
375 |
-
const kSampleRate = 16000;
|
376 |
-
|
377 |
-
window.AudioContext = window.AudioContext || window.webkitAudioContext;
|
378 |
-
window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
379 |
-
|
380 |
-
function loadAudio(event) {
|
381 |
-
if (!context) {
|
382 |
-
context = new AudioContext({
|
383 |
-
sampleRate: kSampleRate,
|
384 |
-
channelCount: 1,
|
385 |
-
echoCancellation: false,
|
386 |
-
autoGainControl: true,
|
387 |
-
noiseSuppression: true,
|
388 |
-
});
|
389 |
-
}
|
390 |
-
|
391 |
-
var file = event.target.files[0] || null;
|
392 |
-
if (file == null) {
|
393 |
-
return;
|
394 |
-
}
|
395 |
-
|
396 |
-
printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
|
397 |
-
printTextarea('js: please wait ...');
|
398 |
|
399 |
-
|
400 |
-
|
401 |
var buf = new Uint8Array(reader.result);
|
402 |
|
403 |
-
context.decodeAudioData(
|
404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
var source = offlineContext.createBufferSource();
|
406 |
source.buffer = audioBuffer;
|
407 |
source.connect(offlineContext.destination);
|
408 |
source.start(0);
|
409 |
|
410 |
-
offlineContext
|
|
|
|
|
411 |
audio = renderedBuffer.getChannelData(0);
|
412 |
-
printTextarea(
|
|
|
|
|
413 |
|
414 |
// truncate to first 30 seconds
|
415 |
-
if (audio.length >
|
416 |
-
|
417 |
-
|
|
|
|
|
|
|
|
|
418 |
}
|
419 |
-
|
420 |
setAudio(audio);
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
424 |
audio = null;
|
425 |
setAudio(audio);
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
}
|
430 |
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
function
|
440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
}
|
442 |
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
function startRecording() {
|
447 |
-
if (!context) {
|
448 |
-
context = new AudioContext({
|
449 |
-
sampleRate: kSampleRate,
|
450 |
-
channelCount: 1,
|
451 |
-
echoCancellation: false,
|
452 |
-
autoGainControl: true,
|
453 |
-
noiseSuppression: true,
|
454 |
-
});
|
455 |
-
}
|
456 |
-
|
457 |
-
document.getElementById('start').disabled = true;
|
458 |
-
document.getElementById('stop').disabled = false;
|
459 |
-
|
460 |
-
document.getElementById('progress-bar').style.width = '0%';
|
461 |
-
document.getElementById('progress-text').innerHTML = '0%';
|
462 |
-
|
463 |
-
doRecording = true;
|
464 |
-
startTime = Date.now();
|
465 |
-
|
466 |
-
var chunks = [];
|
467 |
-
var stream = null;
|
468 |
-
|
469 |
-
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
|
470 |
-
.then(function (s) {
|
471 |
-
stream = s;
|
472 |
-
mediaRecorder = new MediaRecorder(stream);
|
473 |
-
mediaRecorder.ondataavailable = function (e) {
|
474 |
-
chunks.push(e.data);
|
475 |
-
};
|
476 |
-
mediaRecorder.onstop = function (e) {
|
477 |
-
var blob = new Blob(chunks, { 'type': 'audio/ogg; codecs=opus' });
|
478 |
-
chunks = [];
|
479 |
-
|
480 |
-
document.getElementById('start').disabled = false;
|
481 |
-
document.getElementById('stop').disabled = true;
|
482 |
-
|
483 |
-
var reader = new FileReader();
|
484 |
-
reader.onload = function (event) {
|
485 |
-
var buf = new Uint8Array(reader.result);
|
486 |
-
|
487 |
-
context.decodeAudioData(buf.buffer, function (audioBuffer) {
|
488 |
-
var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
|
489 |
-
var source = offlineContext.createBufferSource();
|
490 |
-
source.buffer = audioBuffer;
|
491 |
-
source.connect(offlineContext.destination);
|
492 |
-
source.start(0);
|
493 |
-
|
494 |
-
offlineContext.startRendering().then(function (renderedBuffer) {
|
495 |
-
audio = renderedBuffer.getChannelData(0);
|
496 |
-
printTextarea('js: audio recorded, size: ' + audio.length);
|
497 |
-
|
498 |
-
// truncate to first 30 seconds
|
499 |
-
if (audio.length > kMaxAudio_s * kSampleRate) {
|
500 |
-
audio = audio.slice(0, kMaxAudio_s * kSampleRate);
|
501 |
-
printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
|
502 |
-
}
|
503 |
-
setAudio(audio);
|
504 |
-
});
|
505 |
-
}, function (e) {
|
506 |
-
printTextarea('js: error decoding audio: ' + e);
|
507 |
-
audio = null;
|
508 |
-
setAudio(audio);
|
509 |
-
});
|
510 |
-
}
|
511 |
-
|
512 |
-
reader.readAsArrayBuffer(blob);
|
513 |
-
};
|
514 |
-
mediaRecorder.start();
|
515 |
-
})
|
516 |
-
.catch(function (err) {
|
517 |
-
printTextarea('js: error getting audio stream: ' + err);
|
518 |
-
});
|
519 |
-
|
520 |
-
var interval = setInterval(function () {
|
521 |
-
if (!doRecording) {
|
522 |
-
clearInterval(interval);
|
523 |
-
mediaRecorder.stop();
|
524 |
-
stream.getTracks().forEach(function (track) {
|
525 |
-
track.stop();
|
526 |
-
});
|
527 |
-
}
|
528 |
-
|
529 |
-
document.getElementById('progress-bar').style.width = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s) + '%';
|
530 |
-
document.getElementById('progress-text').innerHTML = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s).toFixed(0) + '%';
|
531 |
-
}, 1000);
|
532 |
-
|
533 |
-
printTextarea('js: recording ...');
|
534 |
-
|
535 |
-
setTimeout(function () {
|
536 |
-
if (doRecording) {
|
537 |
-
printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
|
538 |
-
stopRecording();
|
539 |
-
}
|
540 |
-
}, kMaxAudio_s * 1000);
|
541 |
}
|
542 |
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
function onProcess(translate) {
|
548 |
-
if (!instance) {
|
549 |
-
instance = Module.init('whisper.bin');
|
550 |
-
|
551 |
-
if (instance) {
|
552 |
-
printTextarea("js: whisper initialized, instance: " + instance);
|
553 |
-
document.getElementById('model').innerHTML = 'Model loaded: ' + model_whisper;
|
554 |
-
}
|
555 |
-
}
|
556 |
-
|
557 |
-
if (!instance) {
|
558 |
-
printTextarea("js: failed to initialize whisper");
|
559 |
-
return;
|
560 |
-
}
|
561 |
-
|
562 |
-
if (!audio) {
|
563 |
-
printTextarea("js: no audio data");
|
564 |
-
return;
|
565 |
-
}
|
566 |
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
|
|
|
|
|
|
|
|
579 |
}
|
|
|
580 |
}
|
|
|
581 |
</script>
|
582 |
<script type="text/javascript" src="main.js"></script>
|
583 |
-
</body>
|
584 |
-
|
585 |
-
</html>
|
|
|
1 |
<!doctype html>
|
2 |
<html lang="en-us">
|
3 |
+
<head>
|
|
|
4 |
<title>whisper.cpp : WASM example</title>
|
5 |
|
6 |
<style>
|
7 |
+
#output {
|
8 |
+
width: 100%;
|
9 |
+
height: 100%;
|
10 |
+
margin: 0 auto;
|
11 |
+
margin-top: 10px;
|
12 |
+
border-left: 0px;
|
13 |
+
border-right: 0px;
|
14 |
+
padding-left: 0px;
|
15 |
+
padding-right: 0px;
|
16 |
+
display: block;
|
17 |
+
background-color: black;
|
18 |
+
color: white;
|
19 |
+
font-size: 10px;
|
20 |
+
font-family: "Lucida Console", Monaco, monospace;
|
21 |
+
outline: none;
|
22 |
+
white-space: pre;
|
23 |
+
overflow-wrap: normal;
|
24 |
+
overflow-x: scroll;
|
25 |
+
}
|
|
|
|
|
|
|
|
|
26 |
</style>
|
27 |
+
</head>
|
28 |
+
<body>
|
|
|
29 |
<div id="main-container">
|
30 |
+
<div id="warning" style="display: none; padding: 1rem">
|
31 |
+
<b style="color: red; font-size: large"
|
32 |
+
>Warning: your browser does not support SharedArrayBuffer please try
|
33 |
+
open the page in a new tab.
|
34 |
+
</b>
|
35 |
+
<a href="https://radames-whisper-wasm.hf.space" target="_blank"
|
36 |
+
>https://radames-whisper-wasm.hf.space</a
|
37 |
+
>
|
38 |
+
</div>
|
39 |
+
<b
|
40 |
+
>Minimal
|
41 |
+
<a href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a>
|
42 |
+
example running fully in the browser</b
|
43 |
+
>
|
44 |
+
|
45 |
+
<br /><br />
|
46 |
+
|
47 |
+
Usage instructions:<br />
|
48 |
+
<ul>
|
49 |
+
<li>
|
50 |
+
Load a ggml model file (you can obtain one from
|
51 |
+
<a href="https://ggml.ggerganov.com/">here</a>, recommended:
|
52 |
+
<b>tiny</b> or <b>base</b>)
|
53 |
+
</li>
|
54 |
+
<li>
|
55 |
+
Select audio file to transcribe or record audio from the microphone
|
56 |
+
(sample: <a href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)
|
57 |
+
</li>
|
58 |
+
<li>Click on the "Transcribe" button to start the transcription</li>
|
59 |
+
</ul>
|
60 |
+
|
61 |
+
Note that the computation is quite heavy and may take a few seconds to
|
62 |
+
complete.<br />
|
63 |
+
The transcription results will be displayed in the text area below.<br /><br />
|
64 |
+
<b>Important:</b>
|
65 |
+
<ul>
|
66 |
+
<li>
|
67 |
+
your browser must support WASM SIMD instructions for this to work
|
68 |
+
</li>
|
69 |
+
<li>
|
70 |
+
Firefox cannot load files larger than 256 MB - use Chrome instead
|
71 |
+
</li>
|
72 |
+
</ul>
|
73 |
+
|
74 |
+
<b>More examples:</b>
|
75 |
+
<a href="https://whisper.ggerganov.com/">main</a> |
|
76 |
+
<a href="https://whisper.ggerganov.com/bench">bench</a> |
|
77 |
+
<a href="https://whisper.ggerganov.com/stream">stream</a> |
|
78 |
+
<a href="https://whisper.ggerganov.com/command">command</a> |
|
79 |
+
<a href="https://whisper.ggerganov.com/talk">talk</a> |
|
80 |
+
|
81 |
+
<hr />
|
82 |
+
|
83 |
+
<div id="model">
|
84 |
+
Whisper models: <span id="model-whisper-status"></span><br /><br />
|
85 |
+
<button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">
|
86 |
+
tiny.en (75 MB)
|
87 |
+
</button>
|
88 |
+
<button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">
|
89 |
+
tiny (75 MB)
|
90 |
+
</button>
|
91 |
+
<button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">
|
92 |
+
base.en (142 MB)
|
93 |
+
</button>
|
94 |
+
<button id="fetch-whisper-base" onclick="loadWhisper('base')">
|
95 |
+
base (142 MB)
|
96 |
+
</button>
|
97 |
+
<button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">
|
98 |
+
small.en (466 MB)
|
99 |
+
</button>
|
100 |
+
<button id="fetch-whisper-small" onclick="loadWhisper('small')">
|
101 |
+
small (466 MB)
|
102 |
+
</button>
|
103 |
+
<input
|
104 |
+
type="file"
|
105 |
+
id="whisper-file"
|
106 |
+
name="file"
|
107 |
+
onchange="loadFile(event, 'whisper.bin')"
|
108 |
+
/>
|
109 |
+
<br /><br />
|
110 |
+
Quantized models:<br /><br />
|
111 |
+
<button
|
112 |
+
id="fetch-whisper-tiny-en-q5_1"
|
113 |
+
onclick="loadWhisper('tiny-en-q5_1')"
|
114 |
+
>
|
115 |
+
tiny.en (Q5_1, 31 MB)
|
116 |
+
</button>
|
117 |
+
<button id="fetch-whisper-tiny-q5_1" onclick="loadWhisper('tiny-q5_1')">
|
118 |
+
tiny (Q5_1, 31 MB)
|
119 |
+
</button>
|
120 |
+
<button
|
121 |
+
id="fetch-whisper-base-en-q5_1"
|
122 |
+
onclick="loadWhisper('base-en-q5_1')"
|
123 |
+
>
|
124 |
+
base.en (Q5_1, 57 MB)
|
125 |
+
</button>
|
126 |
+
<button id="fetch-whisper-base-q5_1" onclick="loadWhisper('base-q5_1')">
|
127 |
+
base (Q5_1, 57 MB)
|
128 |
+
</button>
|
129 |
+
<button
|
130 |
+
id="fetch-whisper-small-en-q5_1"
|
131 |
+
onclick="loadWhisper('small-en-q5_1')"
|
132 |
+
>
|
133 |
+
small.en (Q5_1, 182 MB)
|
134 |
+
</button>
|
135 |
+
<button
|
136 |
+
id="fetch-whisper-small-q5_1"
|
137 |
+
onclick="loadWhisper('small-q5_1')"
|
138 |
+
>
|
139 |
+
small (Q5_1, 182 MB)</button
|
140 |
+
><br />
|
141 |
+
<button
|
142 |
+
id="fetch-whisper-medium-en-q5_0"
|
143 |
+
onclick="loadWhisper('medium-en-q5_0')"
|
144 |
+
>
|
145 |
+
medium.en (Q5_0, 515 MB)
|
146 |
+
</button>
|
147 |
+
<button
|
148 |
+
id="fetch-whisper-medium-q5_0"
|
149 |
+
onclick="loadWhisper('medium-q5_0')"
|
150 |
+
>
|
151 |
+
medium (Q5_0, 515 MB)
|
152 |
+
</button>
|
153 |
+
<button
|
154 |
+
id="fetch-whisper-large-q5_0"
|
155 |
+
onclick="loadWhisper('large-q5_0')"
|
156 |
+
>
|
157 |
+
large (Q5_0, 1030 MB)
|
158 |
+
</button>
|
159 |
+
<span id="fetch-whisper-progress"></span>
|
160 |
+
</div>
|
161 |
+
|
162 |
+
<br />
|
163 |
+
|
164 |
+
<!-- radio button to select between file upload or microphone -->
|
165 |
+
<div id="input">
|
166 |
+
Input:
|
167 |
+
<input
|
168 |
+
type="radio"
|
169 |
+
id="file"
|
170 |
+
name="input"
|
171 |
+
value="file"
|
172 |
+
checked="checked"
|
173 |
+
onchange="changeInput('file')"
|
174 |
+
/>
|
175 |
+
<label for="file">File</label>
|
176 |
+
<input
|
177 |
+
type="radio"
|
178 |
+
id="mic"
|
179 |
+
name="input"
|
180 |
+
value="mic"
|
181 |
+
onchange="changeInput('mic')"
|
182 |
+
/>
|
183 |
+
<label for="mic">Microphone</label>
|
184 |
+
</div>
|
185 |
+
|
186 |
+
<br />
|
187 |
+
|
188 |
+
<div id="input_file">
|
189 |
+
Audio file:
|
190 |
+
<input type="file" id="file" name="file" onchange="loadAudio(event)" />
|
191 |
+
</div>
|
192 |
+
|
193 |
+
<div id="input_mic" style="display: none">
|
194 |
+
Microphone:
|
195 |
+
<button id="start" onclick="startRecording()">Start</button>
|
196 |
+
<button id="stop" onclick="stopRecording()" disabled>Stop</button>
|
197 |
+
|
198 |
+
<!-- progress bar to show recording progress -->
|
199 |
+
<br /><br />
|
200 |
+
<div id="progress" style="display: none">
|
201 |
+
<div
|
202 |
+
id="progress-bar"
|
203 |
+
style="width: 0%; height: 10px; background-color: #4caf50"
|
204 |
+
></div>
|
205 |
+
<div id="progress-text">0%</div>
|
206 |
</div>
|
207 |
+
</div>
|
208 |
+
|
209 |
+
<audio controls="controls" id="audio" loop hidden>
|
210 |
+
Your browser does not support the <audio> tag.
|
211 |
+
<source id="source" src="" type="audio/wav" />
|
212 |
+
</audio>
|
213 |
+
|
214 |
+
<hr />
|
215 |
+
<br />
|
216 |
+
|
217 |
+
<table>
|
218 |
+
<tr>
|
219 |
+
<td>
|
220 |
+
Language:
|
221 |
+
<select id="language" name="language">
|
222 |
+
<option value="en">English</option>
|
223 |
+
<option value="ar">Arabic</option>
|
224 |
+
<option value="hy">Armenian</option>
|
225 |
+
<option value="az">Azerbaijani</option>
|
226 |
+
<option value="eu">Basque</option>
|
227 |
+
<option value="be">Belarusian</option>
|
228 |
+
<option value="bn">Bengali</option>
|
229 |
+
<option value="bg">Bulgarian</option>
|
230 |
+
<option value="ca">Catalan</option>
|
231 |
+
<option value="zh">Chinese</option>
|
232 |
+
<option value="hr">Croatian</option>
|
233 |
+
<option value="cs">Czech</option>
|
234 |
+
<option value="da">Danish</option>
|
235 |
+
<option value="nl">Dutch</option>
|
236 |
+
<option value="en">English</option>
|
237 |
+
<option value="et">Estonian</option>
|
238 |
+
<option value="tl">Filipino</option>
|
239 |
+
<option value="fi">Finnish</option>
|
240 |
+
<option value="fr">French</option>
|
241 |
+
<option value="gl">Galician</option>
|
242 |
+
<option value="ka">Georgian</option>
|
243 |
+
<option value="de">German</option>
|
244 |
+
<option value="el">Greek</option>
|
245 |
+
<option value="gu">Gujarati</option>
|
246 |
+
<option value="iw">Hebrew</option>
|
247 |
+
<option value="hi">Hindi</option>
|
248 |
+
<option value="hu">Hungarian</option>
|
249 |
+
<option value="is">Icelandic</option>
|
250 |
+
<option value="id">Indonesian</option>
|
251 |
+
<option value="ga">Irish</option>
|
252 |
+
<option value="it">Italian</option>
|
253 |
+
<option value="ja">Japanese</option>
|
254 |
+
<option value="kn">Kannada</option>
|
255 |
+
<option value="ko">Korean</option>
|
256 |
+
<option value="la">Latin</option>
|
257 |
+
<option value="lv">Latvian</option>
|
258 |
+
<option value="lt">Lithuanian</option>
|
259 |
+
<option value="mk">Macedonian</option>
|
260 |
+
<option value="ms">Malay</option>
|
261 |
+
<option value="mt">Maltese</option>
|
262 |
+
<option value="no">Norwegian</option>
|
263 |
+
<option value="fa">Persian</option>
|
264 |
+
<option value="pl">Polish</option>
|
265 |
+
<option value="pt">Portuguese</option>
|
266 |
+
<option value="ro">Romanian</option>
|
267 |
+
<option value="ru">Russian</option>
|
268 |
+
<option value="sr">Serbian</option>
|
269 |
+
<option value="sk">Slovak</option>
|
270 |
+
<option value="sl">Slovenian</option>
|
271 |
+
<option value="es">Spanish</option>
|
272 |
+
<option value="sw">Swahili</option>
|
273 |
+
<option value="sv">Swedish</option>
|
274 |
+
<option value="ta">Tamil</option>
|
275 |
+
<option value="te">Telugu</option>
|
276 |
+
<option value="th">Thai</option>
|
277 |
+
<option value="tr">Turkish</option>
|
278 |
+
<option value="uk">Ukrainian</option>
|
279 |
+
<option value="ur">Urdu</option>
|
280 |
+
<option value="vi">Vietnamese</option>
|
281 |
+
<option value="cy">Welsh</option>
|
282 |
+
<option value="yi">Yiddish</option>
|
283 |
+
</select>
|
284 |
+
</td>
|
285 |
+
<!-- Slider to select number of threads between 1 and 16 -->
|
286 |
+
<td>
|
287 |
+
Threads:
|
288 |
+
<input
|
289 |
+
type="range"
|
290 |
+
id="threads"
|
291 |
+
name="threads"
|
292 |
+
min="1"
|
293 |
+
max="16"
|
294 |
+
value="8"
|
295 |
+
onchange="changeThreads(this.value)"
|
296 |
+
/>
|
297 |
+
<span id="threads-value">8</span>
|
298 |
+
</td>
|
299 |
+
<td>
|
300 |
+
<button onclick="onProcess(false);">Transcribe</button>
|
301 |
+
</td>
|
302 |
+
<td>
|
303 |
+
<button onclick="onProcess(true);">Translate</button>
|
304 |
+
</td>
|
305 |
+
</tr>
|
306 |
+
</table>
|
307 |
+
|
308 |
+
<br />
|
309 |
+
|
310 |
+
<!-- textarea with height filling the rest of the page -->
|
311 |
+
<textarea id="output" rows="20"></textarea>
|
312 |
+
|
313 |
+
<br /><br />
|
314 |
+
|
315 |
+
<div class="cell-version">
|
316 |
+
<span>
|
317 |
+
| Build time: <span class="nav-link">@GIT_DATE@</span> | Commit hash:
|
318 |
+
<a
|
319 |
+
class="nav-link"
|
320 |
+
href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@"
|
321 |
+
>@GIT_SHA1@</a
|
322 |
+
>
|
323 |
+
| Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
|
324 |
+
<a
|
325 |
+
class="nav-link"
|
326 |
+
href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm"
|
327 |
+
>Source Code</a
|
328 |
+
>
|
329 |
+
|
|
330 |
+
</span>
|
331 |
+
</div>
|
332 |
</div>
|
333 |
|
334 |
<script type="text/javascript" src="helpers.js"></script>
|
335 |
+
<script type="text/javascript">
|
336 |
+
document.addEventListener("DOMContentLoaded", () => {
|
337 |
+
//check is shared array buffer is supported
|
338 |
+
if (!window.SharedArrayBuffer) {
|
339 |
+
document.querySelector("#warning").style.display = "block";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
}
|
341 |
+
});
|
342 |
+
// TODO: convert audio buffer to WAV
|
343 |
+
function setAudio(audio) {
|
344 |
+
//if (audio) {
|
345 |
+
// // convert to 16-bit PCM
|
346 |
+
// var blob = new Blob([audio], { type: 'audio/wav' });
|
347 |
+
// var url = URL.createObjectURL(blob);
|
348 |
+
// document.getElementById('source').src = url;
|
349 |
+
// document.getElementById('audio').hidden = false;
|
350 |
+
// document.getElementById('audio').loop = false;
|
351 |
+
// document.getElementById('audio').load();
|
352 |
+
//} else {
|
353 |
+
// document.getElementById('audio').hidden = true;
|
354 |
+
//}
|
355 |
+
}
|
356 |
+
|
357 |
+
function changeInput(input) {
|
358 |
+
if (input == "file") {
|
359 |
+
document.getElementById("input_file").style.display = "block";
|
360 |
+
document.getElementById("input_mic").style.display = "none";
|
361 |
+
document.getElementById("progress").style.display = "none";
|
362 |
+
} else {
|
363 |
+
document.getElementById("input_file").style.display = "none";
|
364 |
+
document.getElementById("input_mic").style.display = "block";
|
365 |
+
document.getElementById("progress").style.display = "block";
|
366 |
+
}
|
367 |
+
}
|
368 |
+
|
369 |
+
var Module = {
|
370 |
+
print: printTextarea,
|
371 |
+
printErr: printTextarea,
|
372 |
+
setStatus: function (text) {
|
373 |
+
printTextarea("js: " + text);
|
374 |
+
},
|
375 |
+
monitorRunDependencies: function (left) {},
|
376 |
+
};
|
377 |
+
|
378 |
+
// web audio context
|
379 |
+
var context = null;
|
380 |
+
|
381 |
+
// audio data
|
382 |
+
var audio = null;
|
383 |
+
|
384 |
+
// the whisper instance
|
385 |
+
var instance = null;
|
386 |
+
var model_whisper = "";
|
387 |
+
|
388 |
+
// helper function
|
389 |
+
function convertTypedArray(src, type) {
|
390 |
+
var buffer = new ArrayBuffer(src.byteLength);
|
391 |
+
var baseView = new src.constructor(buffer).set(src);
|
392 |
+
return new type(buffer);
|
393 |
+
}
|
394 |
+
|
395 |
+
//
|
396 |
+
// load model
|
397 |
+
//
|
398 |
+
|
399 |
+
let dbVersion = 1;
|
400 |
+
let dbName = "whisper.ggerganov.com";
|
401 |
+
let indexedDB =
|
402 |
+
window.indexedDB ||
|
403 |
+
window.mozIndexedDB ||
|
404 |
+
window.webkitIndexedDB ||
|
405 |
+
window.msIndexedDB;
|
406 |
+
|
407 |
+
function storeFS(fname, buf) {
|
408 |
+
// write to WASM file using FS_createDataFile
|
409 |
+
// if the file exists, delete it
|
410 |
+
try {
|
411 |
+
Module.FS_unlink(fname);
|
412 |
+
} catch (e) {
|
413 |
+
// ignore
|
414 |
}
|
415 |
|
416 |
+
Module.FS_createDataFile("/", fname, buf, true, true);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
|
418 |
+
//model_whisper = fname;
|
|
|
419 |
|
420 |
+
document.getElementById("model-whisper-status").innerHTML =
|
421 |
+
'loaded "' + model_whisper + '"!';
|
422 |
|
423 |
+
printTextarea(
|
424 |
+
"storeFS: stored model: " + fname + " size: " + buf.length
|
425 |
+
);
|
426 |
|
427 |
+
document.getElementById("model").innerHTML =
|
428 |
+
"Model fetched: " + model_whisper;
|
429 |
+
}
|
430 |
+
|
431 |
+
function loadFile(event, fname) {
|
432 |
+
var file = event.target.files[0] || null;
|
433 |
+
if (file == null) {
|
434 |
+
return;
|
435 |
}
|
436 |
|
437 |
+
printTextarea(
|
438 |
+
"loadFile: loading model: " +
|
439 |
+
file.name +
|
440 |
+
", size: " +
|
441 |
+
file.size +
|
442 |
+
" bytes"
|
443 |
+
);
|
444 |
+
printTextarea("loadFile: please wait ...");
|
445 |
+
|
446 |
+
var reader = new FileReader();
|
447 |
+
reader.onload = function (event) {
|
448 |
+
var buf = new Uint8Array(reader.result);
|
449 |
+
storeFS(fname, buf);
|
450 |
+
};
|
451 |
+
reader.readAsArrayBuffer(file);
|
452 |
+
|
453 |
+
document.getElementById("fetch-whisper-tiny-en").style.display = "none";
|
454 |
+
document.getElementById("fetch-whisper-base-en").style.display = "none";
|
455 |
+
document.getElementById("fetch-whisper-small-en").style.display =
|
456 |
+
"none";
|
457 |
+
document.getElementById("fetch-whisper-tiny").style.display = "none";
|
458 |
+
document.getElementById("fetch-whisper-base").style.display = "none";
|
459 |
+
document.getElementById("fetch-whisper-small").style.display = "none";
|
460 |
+
|
461 |
+
document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
|
462 |
+
"none";
|
463 |
+
document.getElementById("fetch-whisper-tiny-q5_1").style.display =
|
464 |
+
"none";
|
465 |
+
document.getElementById("fetch-whisper-base-en-q5_1").style.display =
|
466 |
+
"none";
|
467 |
+
document.getElementById("fetch-whisper-base-q5_1").style.display =
|
468 |
+
"none";
|
469 |
+
document.getElementById("fetch-whisper-small-en-q5_1").style.display =
|
470 |
+
"none";
|
471 |
+
document.getElementById("fetch-whisper-small-q5_1").style.display =
|
472 |
+
"none";
|
473 |
+
document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
|
474 |
+
"none";
|
475 |
+
document.getElementById("fetch-whisper-medium-q5_0").style.display =
|
476 |
+
"none";
|
477 |
+
document.getElementById("fetch-whisper-large-q5_0").style.display =
|
478 |
+
"none";
|
479 |
+
|
480 |
+
document.getElementById("whisper-file").style.display = "none";
|
481 |
+
document.getElementById("model-whisper-status").innerHTML =
|
482 |
+
"loaded model: " + file.name;
|
483 |
+
}
|
484 |
+
|
485 |
+
function loadWhisper(model) {
|
486 |
+
const baseURL = "https://huggingface.co/ggerganov/whisper.cpp/resolve";
|
487 |
+
let urls = {
|
488 |
+
"tiny.en": `${baseURL}/ggml-model-whisper-tiny.en.bin`,
|
489 |
+
tiny: `${baseURL}/ggml-model-whisper-tiny.bin`,
|
490 |
+
"base.en": `${baseURL}/ggml-model-whisper-base.en.bin`,
|
491 |
+
base: `${baseURL}/ggml-model-whisper-base.bin`,
|
492 |
+
"small.en": `${baseURL}/ggml-model-whisper-small.en.bin`,
|
493 |
+
small: `${baseURL}/ggml-model-whisper-small.bin`,
|
494 |
+
|
495 |
+
"tiny-en-q5_1": `${baseURL}/ggml-model-whisper-tiny.en-q5_1.bin`,
|
496 |
+
"tiny-q5_1": `${baseURL}/ggml-model-whisper-tiny-q5_1.bin`,
|
497 |
+
"base-en-q5_1": `${baseURL}/ggml-model-whisper-base.en-q5_1.bin`,
|
498 |
+
"base-q5_1": `${baseURL}/ggml-model-whisper-base-q5_1.bin`,
|
499 |
+
"small-en-q5_1": `${baseURL}/ggml-model-whisper-small.en-q5_1.bin`,
|
500 |
+
"small-q5_1": `${baseURL}/ggml-model-whisper-small-q5_1.bin`,
|
501 |
+
"medium-en-q5_0": `${baseURL}/ggml-model-whisper-medium.en-q5_0.bin`,
|
502 |
+
"medium-q5_0": `${baseURL}/ggml-model-whisper-medium-q5_0.bin`,
|
503 |
+
"large-q5_0": `${baseURL}/ggml-model-whisper-large-q5_0.bin`,
|
504 |
+
};
|
505 |
|
506 |
+
let sizes = {
|
507 |
+
"tiny.en": 75,
|
508 |
+
tiny: 75,
|
509 |
+
"base.en": 142,
|
510 |
+
base: 142,
|
511 |
+
"small.en": 466,
|
512 |
+
small: 466,
|
513 |
+
|
514 |
+
"tiny-en-q5_1": 31,
|
515 |
+
"tiny-q5_1": 31,
|
516 |
+
"base-en-q5_1": 57,
|
517 |
+
"base-q5_1": 57,
|
518 |
+
"small-en-q5_1": 182,
|
519 |
+
"small-q5_1": 182,
|
520 |
+
"medium-en-q5_0": 515,
|
521 |
+
"medium-q5_0": 515,
|
522 |
+
"large-q5_0": 1030,
|
523 |
+
};
|
524 |
|
525 |
+
let url = urls[model];
|
526 |
+
let dst = "whisper.bin";
|
527 |
+
let size_mb = sizes[model];
|
528 |
+
|
529 |
+
model_whisper = model;
|
530 |
+
|
531 |
+
document.getElementById("fetch-whisper-tiny-en").style.display = "none";
|
532 |
+
document.getElementById("fetch-whisper-base-en").style.display = "none";
|
533 |
+
document.getElementById("fetch-whisper-small-en").style.display =
|
534 |
+
"none";
|
535 |
+
document.getElementById("fetch-whisper-tiny").style.display = "none";
|
536 |
+
document.getElementById("fetch-whisper-base").style.display = "none";
|
537 |
+
document.getElementById("fetch-whisper-small").style.display = "none";
|
538 |
+
|
539 |
+
document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
|
540 |
+
"none";
|
541 |
+
document.getElementById("fetch-whisper-tiny-q5_1").style.display =
|
542 |
+
"none";
|
543 |
+
document.getElementById("fetch-whisper-base-en-q5_1").style.display =
|
544 |
+
"none";
|
545 |
+
document.getElementById("fetch-whisper-base-q5_1").style.display =
|
546 |
+
"none";
|
547 |
+
document.getElementById("fetch-whisper-small-en-q5_1").style.display =
|
548 |
+
"none";
|
549 |
+
document.getElementById("fetch-whisper-small-q5_1").style.display =
|
550 |
+
"none";
|
551 |
+
document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
|
552 |
+
"none";
|
553 |
+
document.getElementById("fetch-whisper-medium-q5_0").style.display =
|
554 |
+
"none";
|
555 |
+
document.getElementById("fetch-whisper-large-q5_0").style.display =
|
556 |
+
"none";
|
557 |
+
|
558 |
+
document.getElementById("whisper-file").style.display = "none";
|
559 |
+
document.getElementById("model-whisper-status").innerHTML =
|
560 |
+
"loading model: " + model;
|
561 |
+
|
562 |
+
cbProgress = function (p) {
|
563 |
+
let el = document.getElementById("fetch-whisper-progress");
|
564 |
+
el.innerHTML = Math.round(100 * p) + "%";
|
565 |
+
};
|
566 |
|
567 |
+
cbCancel = function () {
|
568 |
+
var el;
|
569 |
+
|
570 |
+
el = document.getElementById("fetch-whisper-tiny-en");
|
571 |
+
if (el) el.style.display = "inline-block";
|
572 |
+
el = document.getElementById("fetch-whisper-base-en");
|
573 |
+
if (el) el.style.display = "inline-block";
|
574 |
+
el = document.getElementById("fetch-whisper-small-en");
|
575 |
+
if (el) el.style.display = "inline-block";
|
576 |
+
el = document.getElementById("fetch-whisper-tiny");
|
577 |
+
if (el) el.style.display = "inline-block";
|
578 |
+
el = document.getElementById("fetch-whisper-base");
|
579 |
+
if (el) el.style.display = "inline-block";
|
580 |
+
el = document.getElementById("fetch-whisper-small");
|
581 |
+
if (el) el.style.display = "inline-block";
|
582 |
+
|
583 |
+
el = document.getElementById("fetch-whisper-tiny-en-q5_1");
|
584 |
+
if (el) el.style.display = "inline-block";
|
585 |
+
el = document.getElementById("fetch-whisper-tiny-q5_1");
|
586 |
+
if (el) el.style.display = "inline-block";
|
587 |
+
el = document.getElementById("fetch-whisper-base-en-q5_1");
|
588 |
+
if (el) el.style.display = "inline-block";
|
589 |
+
el = document.getElementById("fetch-whisper-base-q5_1");
|
590 |
+
if (el) el.style.display = "inline-block";
|
591 |
+
el = document.getElementById("fetch-whisper-small-en-q5_1");
|
592 |
+
if (el) el.style.display = "inline-block";
|
593 |
+
el = document.getElementById("fetch-whisper-small-q5_1");
|
594 |
+
if (el) el.style.display = "inline-block";
|
595 |
+
el = document.getElementById("fetch-whisper-medium-en-q5_0");
|
596 |
+
if (el) el.style.display = "inline-block";
|
597 |
+
el = document.getElementById("fetch-whisper-medium-q5_0");
|
598 |
+
if (el) el.style.display = "inline-block";
|
599 |
+
el = document.getElementById("fetch-whisper-large-q5_0");
|
600 |
+
if (el) el.style.display = "inline-block";
|
601 |
+
|
602 |
+
el = document.getElementById("whisper-file");
|
603 |
+
if (el) el.style.display = "inline-block";
|
604 |
+
el = document.getElementById("model-whisper-status");
|
605 |
+
if (el) el.innerHTML = "";
|
606 |
+
};
|
607 |
|
608 |
+
loadRemote(
|
609 |
+
url,
|
610 |
+
dst,
|
611 |
+
size_mb,
|
612 |
+
cbProgress,
|
613 |
+
storeFS,
|
614 |
+
cbCancel,
|
615 |
+
printTextarea
|
616 |
+
);
|
617 |
+
}
|
618 |
+
|
619 |
+
//
|
620 |
+
// audio file
|
621 |
+
//
|
622 |
+
|
623 |
+
const kMaxAudio_s = 30 * 60;
|
624 |
+
const kMaxRecording_s = 2 * 60;
|
625 |
+
const kSampleRate = 16000;
|
626 |
+
|
627 |
+
window.AudioContext = window.AudioContext || window.webkitAudioContext;
|
628 |
+
window.OfflineAudioContext =
|
629 |
+
window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
630 |
+
|
631 |
+
function loadAudio(event) {
|
632 |
+
if (!context) {
|
633 |
+
context = new AudioContext({
|
634 |
+
sampleRate: kSampleRate,
|
635 |
+
channelCount: 1,
|
636 |
+
echoCancellation: false,
|
637 |
+
autoGainControl: true,
|
638 |
+
noiseSuppression: true,
|
639 |
+
});
|
640 |
}
|
641 |
|
642 |
+
var file = event.target.files[0] || null;
|
643 |
+
if (file == null) {
|
644 |
+
return;
|
645 |
+
}
|
|
|
646 |
|
647 |
+
printTextarea(
|
648 |
+
"js: loading audio: " + file.name + ", size: " + file.size + " bytes"
|
649 |
+
);
|
650 |
+
printTextarea("js: please wait ...");
|
651 |
+
|
652 |
+
var reader = new FileReader();
|
653 |
+
reader.onload = function (event) {
|
654 |
+
var buf = new Uint8Array(reader.result);
|
655 |
+
|
656 |
+
context.decodeAudioData(
|
657 |
+
buf.buffer,
|
658 |
+
function (audioBuffer) {
|
659 |
+
var offlineContext = new OfflineAudioContext(
|
660 |
+
audioBuffer.numberOfChannels,
|
661 |
+
audioBuffer.length,
|
662 |
+
audioBuffer.sampleRate
|
663 |
+
);
|
664 |
+
var source = offlineContext.createBufferSource();
|
665 |
+
source.buffer = audioBuffer;
|
666 |
+
source.connect(offlineContext.destination);
|
667 |
+
source.start(0);
|
668 |
+
|
669 |
+
offlineContext.startRendering().then(function (renderedBuffer) {
|
670 |
+
audio = renderedBuffer.getChannelData(0);
|
671 |
+
printTextarea("js: audio loaded, size: " + audio.length);
|
672 |
+
|
673 |
+
// truncate to first 30 seconds
|
674 |
+
if (audio.length > kMaxAudio_s * kSampleRate) {
|
675 |
+
audio = audio.slice(0, kMaxAudio_s * kSampleRate);
|
676 |
+
printTextarea(
|
677 |
+
"js: truncated audio to first " + kMaxAudio_s + " seconds"
|
678 |
+
);
|
679 |
+
}
|
680 |
|
681 |
+
setAudio(audio);
|
682 |
+
});
|
683 |
+
},
|
684 |
+
function (e) {
|
685 |
+
printTextarea("js: error decoding audio: " + e);
|
686 |
+
audio = null;
|
687 |
+
setAudio(audio);
|
688 |
}
|
689 |
+
);
|
690 |
+
};
|
691 |
+
reader.readAsArrayBuffer(file);
|
692 |
+
}
|
693 |
+
|
694 |
+
//
|
695 |
+
// microphone
|
696 |
+
//
|
697 |
+
|
698 |
+
var mediaRecorder = null;
|
699 |
+
var doRecording = false;
|
700 |
+
var startTime = 0;
|
701 |
+
|
702 |
+
function stopRecording() {
|
703 |
+
doRecording = false;
|
704 |
+
}
|
705 |
+
|
706 |
+
// record up to kMaxRecording_s seconds of audio from the microphone
|
707 |
+
// check if doRecording is false every 1000 ms and stop recording if so
|
708 |
+
// update progress information
|
709 |
+
function startRecording() {
|
710 |
+
if (!context) {
|
711 |
+
context = new AudioContext({
|
712 |
+
sampleRate: kSampleRate,
|
713 |
+
channelCount: 1,
|
714 |
+
echoCancellation: false,
|
715 |
+
autoGainControl: true,
|
716 |
+
noiseSuppression: true,
|
717 |
+
});
|
718 |
}
|
719 |
|
720 |
+
document.getElementById("start").disabled = true;
|
721 |
+
document.getElementById("stop").disabled = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
|
723 |
+
document.getElementById("progress-bar").style.width = "0%";
|
724 |
+
document.getElementById("progress-text").innerHTML = "0%";
|
|
|
725 |
|
726 |
+
doRecording = true;
|
727 |
+
startTime = Date.now();
|
728 |
|
729 |
+
var chunks = [];
|
730 |
+
var stream = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
731 |
|
732 |
+
navigator.mediaDevices
|
733 |
+
.getUserMedia({ audio: true, video: false })
|
734 |
+
.then(function (s) {
|
735 |
+
stream = s;
|
736 |
+
mediaRecorder = new MediaRecorder(stream);
|
737 |
+
mediaRecorder.ondataavailable = function (e) {
|
738 |
+
chunks.push(e.data);
|
|
|
739 |
};
|
740 |
+
mediaRecorder.onstop = function (e) {
|
741 |
+
var blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
|
742 |
+
chunks = [];
|
743 |
|
744 |
+
document.getElementById("start").disabled = false;
|
745 |
+
document.getElementById("stop").disabled = true;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
746 |
|
747 |
+
var reader = new FileReader();
|
748 |
+
reader.onload = function (event) {
|
749 |
var buf = new Uint8Array(reader.result);
|
750 |
|
751 |
+
context.decodeAudioData(
|
752 |
+
buf.buffer,
|
753 |
+
function (audioBuffer) {
|
754 |
+
var offlineContext = new OfflineAudioContext(
|
755 |
+
audioBuffer.numberOfChannels,
|
756 |
+
audioBuffer.length,
|
757 |
+
audioBuffer.sampleRate
|
758 |
+
);
|
759 |
var source = offlineContext.createBufferSource();
|
760 |
source.buffer = audioBuffer;
|
761 |
source.connect(offlineContext.destination);
|
762 |
source.start(0);
|
763 |
|
764 |
+
offlineContext
|
765 |
+
.startRendering()
|
766 |
+
.then(function (renderedBuffer) {
|
767 |
audio = renderedBuffer.getChannelData(0);
|
768 |
+
printTextarea(
|
769 |
+
"js: audio recorded, size: " + audio.length
|
770 |
+
);
|
771 |
|
772 |
// truncate to first 30 seconds
|
773 |
+
if (audio.length > kMaxRecording_s * kSampleRate) {
|
774 |
+
audio = audio.slice(0, kMaxRecording_s * kSampleRate);
|
775 |
+
printTextarea(
|
776 |
+
"js: truncated audio to first " +
|
777 |
+
kMaxRecording_s +
|
778 |
+
" seconds"
|
779 |
+
);
|
780 |
}
|
|
|
781 |
setAudio(audio);
|
782 |
+
});
|
783 |
+
},
|
784 |
+
function (e) {
|
785 |
+
printTextarea("js: error decoding audio: " + e);
|
786 |
audio = null;
|
787 |
setAudio(audio);
|
788 |
+
}
|
789 |
+
);
|
790 |
+
};
|
|
|
791 |
|
792 |
+
reader.readAsArrayBuffer(blob);
|
793 |
+
};
|
794 |
+
mediaRecorder.start();
|
795 |
+
})
|
796 |
+
.catch(function (err) {
|
797 |
+
printTextarea("js: error getting audio stream: " + err);
|
798 |
+
});
|
799 |
+
|
800 |
+
var interval = setInterval(function () {
|
801 |
+
if (!doRecording) {
|
802 |
+
clearInterval(interval);
|
803 |
+
mediaRecorder.stop();
|
804 |
+
stream.getTracks().forEach(function (track) {
|
805 |
+
track.stop();
|
806 |
+
});
|
807 |
+
}
|
808 |
+
|
809 |
+
document.getElementById("progress-bar").style.width =
|
810 |
+
(100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s + "%";
|
811 |
+
document.getElementById("progress-text").innerHTML =
|
812 |
+
((100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s).toFixed(
|
813 |
+
0
|
814 |
+
) + "%";
|
815 |
+
}, 1000);
|
816 |
+
|
817 |
+
printTextarea("js: recording ...");
|
818 |
+
|
819 |
+
setTimeout(function () {
|
820 |
+
if (doRecording) {
|
821 |
+
printTextarea(
|
822 |
+
"js: recording stopped after " + kMaxRecording_s + " seconds"
|
823 |
+
);
|
824 |
+
stopRecording();
|
825 |
+
}
|
826 |
+
}, kMaxRecording_s * 1000);
|
827 |
+
}
|
828 |
+
|
829 |
+
//
|
830 |
+
// transcribe
|
831 |
+
//
|
832 |
+
|
833 |
+
var nthreads = 8;
|
834 |
+
|
835 |
+
function changeThreads(value) {
|
836 |
+
nthreads = value;
|
837 |
+
document.getElementById("threads-value").innerHTML = nthreads;
|
838 |
+
}
|
839 |
+
|
840 |
+
function onProcess(translate) {
|
841 |
+
if (!instance) {
|
842 |
+
instance = Module.init("whisper.bin");
|
843 |
+
|
844 |
+
if (instance) {
|
845 |
+
printTextarea("js: whisper initialized, instance: " + instance);
|
846 |
+
document.getElementById("model").innerHTML =
|
847 |
+
"Model loaded: " + model_whisper;
|
848 |
+
}
|
849 |
}
|
850 |
|
851 |
+
if (!instance) {
|
852 |
+
printTextarea("js: failed to initialize whisper");
|
853 |
+
return;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
854 |
}
|
855 |
|
856 |
+
if (!audio) {
|
857 |
+
printTextarea("js: no audio data");
|
858 |
+
return;
|
859 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
860 |
|
861 |
+
if (instance) {
|
862 |
+
printTextarea("");
|
863 |
+
printTextarea("js: processing - this might take a while ...");
|
864 |
+
printTextarea("");
|
865 |
+
|
866 |
+
setTimeout(function () {
|
867 |
+
var ret = Module.full_default(
|
868 |
+
instance,
|
869 |
+
audio,
|
870 |
+
document.getElementById("language").value,
|
871 |
+
nthreads,
|
872 |
+
translate
|
873 |
+
);
|
874 |
+
console.log("js: full_default returned: " + ret);
|
875 |
+
if (ret) {
|
876 |
+
printTextarea("js: whisper returned: " + ret);
|
877 |
}
|
878 |
+
}, 100);
|
879 |
}
|
880 |
+
}
|
881 |
</script>
|
882 |
<script type="text/javascript" src="main.js"></script>
|
883 |
+
</body>
|
884 |
+
</html>
|
|