radames commited on
Commit
87ab4d8
·
1 Parent(s): 534060f

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +822 -523
index.html CHANGED
@@ -1,585 +1,884 @@
1
  <!doctype html>
2
  <html lang="en-us">
3
-
4
- <head>
5
  <title>whisper.cpp : WASM example</title>
6
 
7
  <style>
8
- body {
9
- background-color: white;
10
- }
11
-
12
- #output {
13
- width: 100%;
14
- height: 100%;
15
- margin: 0 auto;
16
- margin-top: 10px;
17
- border-left: 0px;
18
- border-right: 0px;
19
- padding-left: 0px;
20
- padding-right: 0px;
21
- display: block;
22
- background-color: black;
23
- color: white;
24
- font-size: 10px;
25
- font-family: 'Lucida Console', Monaco, monospace;
26
- outline: none;
27
- white-space: pre;
28
- overflow-wrap: normal;
29
- overflow-x: scroll;
30
- }
31
  </style>
32
- </head>
33
-
34
- <body>
35
  <div id="main-container">
36
- <div id="warning" style="display: none; padding: 1rem;">
37
- <b style="color: red ; font-size: large;">Warning: your browser does not support SharedArrayBuffer please
38
- try open the page in a new tab.
39
- </b> <a href="https://radames-whisper-wasm.hf.space"
40
- target="_blank">https://radames-whisper-wasm.hf.space</a>
41
- </div>
42
- <b>Minimal <a target="_blank" href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a> example running
43
- fully in the browser</b>
44
-
45
- <br><br>
46
-
47
- Usage instructions:<br>
48
- <ul>
49
- <li>Load a ggml model file (you can obtain one from <a target="_blank"
50
- href="https://ggml.ggerganov.com/">here</a>, recommended: <b>tiny</b> or <b>base</b>)</li>
51
- <li>Select audio file to transcribe or record audio from the microphone (sample: <a target="_blank"
52
- href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)</li>
53
- <li>Click on the "Transcribe" button to start the transcription</li>
54
- </ul>
55
-
56
- Note that the computation is quite heavy and may take a few seconds to complete.<br>
57
- The transcription results will be displayed in the text area below.<br><br>
58
- <b>Important: your browser must support WASM SIMD instructions for this to work.</b>
59
-
60
- <br><br>
61
- <hr>
62
-
63
- <div id="model">
64
- Whisper model: <span id="model-whisper-status"></span>
65
- <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
66
- <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
67
- <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
68
- <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
69
- <button id="fetch-whisper-extra" onclick="loadWhisper('extra')">EXTRA</button>
70
- <span id="fetch-whisper-progress"></span>
71
-
72
- <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
73
- </div>
74
-
75
- <br>
76
-
77
- <!-- radio button to select between file upload or microphone -->
78
- <div id="input">
79
- Input:
80
- <input type="radio" id="file" name="input" value="file" checked="checked" onchange="changeInput('file')" />
81
- File
82
- <input type="radio" id="mic" name="input" value="mic" onchange="changeInput('mic')" /> Microphone
83
- </div>
84
-
85
- <br>
86
-
87
- <div id="input_file">
88
- Audio file:
89
- <input type="file" id="file" name="file" onchange="loadAudio(event)" />
90
- </div>
91
-
92
- <div id="input_mic" style="display: none;">
93
- Microphone:
94
- <button id="start" onclick="startRecording()">Start</button>
95
- <button id="stop" onclick="stopRecording()" disabled>Stop</button>
96
-
97
- <!-- progress bar to show recording progress -->
98
- <br><br>
99
- <div id="progress" style="display: none;">
100
- <div id="progress-bar" style="width: 0%; height: 10px; background-color: #4CAF50;"></div>
101
- <div id="progress-text">0%</div>
102
- </div>
103
- </div>
104
-
105
- <audio controls="controls" id="audio" loop hidden>
106
- Your browser does not support the &lt;audio&gt; tag.
107
- <source id="source" src="" type="audio/wav" />
108
- </audio>
109
-
110
- <hr><br>
111
-
112
- <table>
113
- <tr>
114
- <td>
115
- Language:
116
- <select id="language" name="language">
117
- <option value="en">English</option>
118
- <option value="ar">Arabic</option>
119
- <option value="hy">Armenian</option>
120
- <option value="az">Azerbaijani</option>
121
- <option value="eu">Basque</option>
122
- <option value="be">Belarusian</option>
123
- <option value="bn">Bengali</option>
124
- <option value="bg">Bulgarian</option>
125
- <option value="ca">Catalan</option>
126
- <option value="zh">Chinese</option>
127
- <option value="hr">Croatian</option>
128
- <option value="cs">Czech</option>
129
- <option value="da">Danish</option>
130
- <option value="nl">Dutch</option>
131
- <option value="en">English</option>
132
- <option value="et">Estonian</option>
133
- <option value="tl">Filipino</option>
134
- <option value="fi">Finnish</option>
135
- <option value="fr">French</option>
136
- <option value="gl">Galician</option>
137
- <option value="ka">Georgian</option>
138
- <option value="de">German</option>
139
- <option value="el">Greek</option>
140
- <option value="gu">Gujarati</option>
141
- <option value="iw">Hebrew</option>
142
- <option value="hi">Hindi</option>
143
- <option value="hu">Hungarian</option>
144
- <option value="is">Icelandic</option>
145
- <option value="id">Indonesian</option>
146
- <option value="ga">Irish</option>
147
- <option value="it">Italian</option>
148
- <option value="ja">Japanese</option>
149
- <option value="kn">Kannada</option>
150
- <option value="ko">Korean</option>
151
- <option value="la">Latin</option>
152
- <option value="lv">Latvian</option>
153
- <option value="lt">Lithuanian</option>
154
- <option value="mk">Macedonian</option>
155
- <option value="ms">Malay</option>
156
- <option value="mt">Maltese</option>
157
- <option value="no">Norwegian</option>
158
- <option value="fa">Persian</option>
159
- <option value="pl">Polish</option>
160
- <option value="pt">Portuguese</option>
161
- <option value="ro">Romanian</option>
162
- <option value="ru">Russian</option>
163
- <option value="sr">Serbian</option>
164
- <option value="sk">Slovak</option>
165
- <option value="sl">Slovenian</option>
166
- <option value="es">Spanish</option>
167
- <option value="sw">Swahili</option>
168
- <option value="sv">Swedish</option>
169
- <option value="ta">Tamil</option>
170
- <option value="te">Telugu</option>
171
- <option value="th">Thai</option>
172
- <option value="tr">Turkish</option>
173
- <option value="uk">Ukrainian</option>
174
- <option value="ur">Urdu</option>
175
- <option value="vi">Vietnamese</option>
176
- <option value="cy">Welsh</option>
177
- <option value="yi">Yiddish</option>
178
- </select>
179
- </td>
180
- <td>
181
- <button onclick="onProcess(false);">Transcribe</button>
182
- </td>
183
- <td>
184
- <button onclick="onProcess(true);">Translate</button>
185
- </td>
186
- </tr>
187
- </table>
188
-
189
- <br>
190
-
191
- <!-- textarea with height filling the rest of the page -->
192
- <textarea id="output" rows="20"></textarea>
193
-
194
- <br><br>
195
-
196
- <div class="cell-version">
197
- <span>
198
- |
199
- Build time: <span class="nav-link">Mon Jan 16 11:57:35 2023</span> |
200
- Commit hash: <a class="nav-link" target="_blank"
201
- href="https://github.com/ggerganov/whisper.cpp/commit/49b529ba">49b529ba</a> |
202
- Commit subject: <span class="nav-link">whisper.android : add support for loading directly from asset in
203
- C (#415)</span> |
204
- <a class="nav-link" target="_blank"
205
- href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm">Source Code</a> |
206
- </span>
 
 
 
 
 
207
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  </div>
209
 
210
  <script type="text/javascript" src="helpers.js"></script>
211
- <script type='text/javascript'>
212
- document.addEventListener("DOMContentLoaded", () => {
213
- //check is shared array buffer is supported
214
- if (!window.SharedArrayBuffer) {
215
- document.querySelector("#warning").style.display = "block";
216
- }
217
- })
218
- // TODO: convert audio buffer to WAV
219
- function setAudio(audio) {
220
- //if (audio) {
221
- // // convert to 16-bit PCM
222
- // var blob = new Blob([audio], { type: 'audio/wav' });
223
- // var url = URL.createObjectURL(blob);
224
- // document.getElementById('source').src = url;
225
- // document.getElementById('audio').hidden = false;
226
- // document.getElementById('audio').loop = false;
227
- // document.getElementById('audio').load();
228
- //} else {
229
- // document.getElementById('audio').hidden = true;
230
- //}
231
  }
232
-
233
- function changeInput(input) {
234
- if (input == 'file') {
235
- document.getElementById('input_file').style.display = 'block';
236
- document.getElementById('input_mic').style.display = 'none';
237
- document.getElementById('progress').style.display = 'none';
238
- } else {
239
- document.getElementById('input_file').style.display = 'none';
240
- document.getElementById('input_mic').style.display = 'block';
241
- document.getElementById('progress').style.display = 'block';
242
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  }
244
 
245
- var Module = {
246
- print: printTextarea,
247
- printErr: printTextarea,
248
- setStatus: function (text) {
249
- printTextarea('js: ' + text);
250
- },
251
- monitorRunDependencies: function (left) {
252
- }
253
- };
254
 
255
- // web audio context
256
- var context = null;
257
 
258
- // audio data
259
- var audio = null;
260
 
261
- // the whisper instance
262
- var instance = null;
263
- var model_whisper = '';
264
 
265
- // helper function
266
- function convertTypedArray(src, type) {
267
- var buffer = new ArrayBuffer(src.byteLength);
268
- var baseView = new src.constructor(buffer).set(src);
269
- return new type(buffer);
 
 
 
270
  }
271
 
272
- //
273
- // load model
274
- //
275
-
276
- let dbVersion = 1
277
- let dbName = 'whisper.ggerganov.com';
278
- let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
279
-
280
- function storeFS(fname, buf) {
281
- // write to WASM file using FS_createDataFile
282
- // if the file exists, delete it
283
- try {
284
- Module.FS_unlink(fname);
285
- } catch (e) {
286
- // ignore
287
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
- Module.FS_createDataFile("/", fname, buf, true, true);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- model_whisper = fname;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
- document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
- printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  }
297
 
298
- function loadFile(event, fname) {
299
- var file = event.target.files[0] || null;
300
- if (file == null) {
301
- return;
302
- }
303
 
304
- printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
305
- printTextarea('loadFile: please wait ...');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- var reader = new FileReader();
308
- reader.onload = function (event) {
309
- var buf = new Uint8Array(reader.result);
310
- storeFS(fname, buf);
 
 
 
311
  }
312
- reader.readAsArrayBuffer(file);
313
-
314
- document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
315
- document.getElementById('fetch-whisper-base-en').style.display = 'none';
316
- document.getElementById('fetch-whisper-tiny').style.display = 'none';
317
- document.getElementById('fetch-whisper-base').style.display = 'none';
318
- document.getElementById('whisper-file').style.display = 'none';
319
- document.getElementById('model-whisper-status').innerHTML = 'loaded model: ' + file.name;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  }
321
 
322
- function loadWhisper(model) {
323
- let urls = {
324
- 'tiny.en': 'models/ggml-tiny.en.bin',
325
- 'tiny': 'models/ggml-tiny.bin',
326
- 'base.en': 'models/ggml-base.en.bin',
327
- 'base': 'models/ggml-base.bin',
328
- 'extra': 'https://huggingface.co/radames/general/resolve/main/ggml-tiny.bin'
329
-
330
- };
331
-
332
- let sizes = {
333
- 'tiny.en': 75,
334
- 'tiny': 75,
335
- 'base.en': 142,
336
- 'base': 142,
337
- };
338
 
339
- let url = urls[model];
340
- let dst = 'whisper.bin';
341
- let size_mb = sizes[model];
342
 
343
- model_whisper = model;
 
344
 
345
- document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
346
- document.getElementById('fetch-whisper-base-en').style.display = 'none';
347
- document.getElementById('fetch-whisper-tiny').style.display = 'none';
348
- document.getElementById('fetch-whisper-base').style.display = 'none';
349
- document.getElementById('whisper-file').style.display = 'none';
350
- document.getElementById('model-whisper-status').innerHTML = 'loading model: ' + model;
351
-
352
- cbProgress = function (p) {
353
- let el = document.getElementById('fetch-whisper-progress');
354
- el.innerHTML = Math.round(100 * p) + '%';
355
- };
356
 
357
- cbCancel = function () {
358
- var el;
359
- el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
360
- el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
361
- el = document.getElementById('fetch-whisper-tiny'); if (el) el.style.display = 'inline-block';
362
- el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
363
- el = document.getElementById('whisper-file'); if (el) el.style.display = 'inline-block';
364
- el = document.getElementById('model-whisper-status'); if (el) el.innerHTML = '';
365
  };
 
 
 
366
 
367
- loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
368
- }
369
-
370
- //
371
- // audio file
372
- //
373
-
374
- const kMaxAudio_s = 120;
375
- const kSampleRate = 16000;
376
-
377
- window.AudioContext = window.AudioContext || window.webkitAudioContext;
378
- window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
379
-
380
- function loadAudio(event) {
381
- if (!context) {
382
- context = new AudioContext({
383
- sampleRate: kSampleRate,
384
- channelCount: 1,
385
- echoCancellation: false,
386
- autoGainControl: true,
387
- noiseSuppression: true,
388
- });
389
- }
390
-
391
- var file = event.target.files[0] || null;
392
- if (file == null) {
393
- return;
394
- }
395
-
396
- printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
397
- printTextarea('js: please wait ...');
398
 
399
- var reader = new FileReader();
400
- reader.onload = function (event) {
401
  var buf = new Uint8Array(reader.result);
402
 
403
- context.decodeAudioData(buf.buffer, function (audioBuffer) {
404
- var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
 
 
 
 
 
 
405
  var source = offlineContext.createBufferSource();
406
  source.buffer = audioBuffer;
407
  source.connect(offlineContext.destination);
408
  source.start(0);
409
 
410
- offlineContext.startRendering().then(function (renderedBuffer) {
 
 
411
  audio = renderedBuffer.getChannelData(0);
412
- printTextarea('js: audio loaded, size: ' + audio.length);
 
 
413
 
414
  // truncate to first 30 seconds
415
- if (audio.length > kMaxAudio_s * kSampleRate) {
416
- audio = audio.slice(0, kMaxAudio_s * kSampleRate);
417
- printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
 
 
 
 
418
  }
419
-
420
  setAudio(audio);
421
- });
422
- }, function (e) {
423
- printTextarea('js: error decoding audio: ' + e);
 
424
  audio = null;
425
  setAudio(audio);
426
- });
427
- }
428
- reader.readAsArrayBuffer(file);
429
- }
430
 
431
- //
432
- // microphone
433
- //
434
-
435
- var mediaRecorder = null;
436
- var doRecording = false;
437
- var startTime = 0;
438
-
439
- function stopRecording() {
440
- doRecording = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  }
442
 
443
- // record up to kMaxAudio_s seconds of audio from the microphone
444
- // check if doRecording is false every 1000 ms and stop recording if so
445
- // update progress information
446
- function startRecording() {
447
- if (!context) {
448
- context = new AudioContext({
449
- sampleRate: kSampleRate,
450
- channelCount: 1,
451
- echoCancellation: false,
452
- autoGainControl: true,
453
- noiseSuppression: true,
454
- });
455
- }
456
-
457
- document.getElementById('start').disabled = true;
458
- document.getElementById('stop').disabled = false;
459
-
460
- document.getElementById('progress-bar').style.width = '0%';
461
- document.getElementById('progress-text').innerHTML = '0%';
462
-
463
- doRecording = true;
464
- startTime = Date.now();
465
-
466
- var chunks = [];
467
- var stream = null;
468
-
469
- navigator.mediaDevices.getUserMedia({ audio: true, video: false })
470
- .then(function (s) {
471
- stream = s;
472
- mediaRecorder = new MediaRecorder(stream);
473
- mediaRecorder.ondataavailable = function (e) {
474
- chunks.push(e.data);
475
- };
476
- mediaRecorder.onstop = function (e) {
477
- var blob = new Blob(chunks, { 'type': 'audio/ogg; codecs=opus' });
478
- chunks = [];
479
-
480
- document.getElementById('start').disabled = false;
481
- document.getElementById('stop').disabled = true;
482
-
483
- var reader = new FileReader();
484
- reader.onload = function (event) {
485
- var buf = new Uint8Array(reader.result);
486
-
487
- context.decodeAudioData(buf.buffer, function (audioBuffer) {
488
- var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
489
- var source = offlineContext.createBufferSource();
490
- source.buffer = audioBuffer;
491
- source.connect(offlineContext.destination);
492
- source.start(0);
493
-
494
- offlineContext.startRendering().then(function (renderedBuffer) {
495
- audio = renderedBuffer.getChannelData(0);
496
- printTextarea('js: audio recorded, size: ' + audio.length);
497
-
498
- // truncate to first 30 seconds
499
- if (audio.length > kMaxAudio_s * kSampleRate) {
500
- audio = audio.slice(0, kMaxAudio_s * kSampleRate);
501
- printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
502
- }
503
- setAudio(audio);
504
- });
505
- }, function (e) {
506
- printTextarea('js: error decoding audio: ' + e);
507
- audio = null;
508
- setAudio(audio);
509
- });
510
- }
511
-
512
- reader.readAsArrayBuffer(blob);
513
- };
514
- mediaRecorder.start();
515
- })
516
- .catch(function (err) {
517
- printTextarea('js: error getting audio stream: ' + err);
518
- });
519
-
520
- var interval = setInterval(function () {
521
- if (!doRecording) {
522
- clearInterval(interval);
523
- mediaRecorder.stop();
524
- stream.getTracks().forEach(function (track) {
525
- track.stop();
526
- });
527
- }
528
-
529
- document.getElementById('progress-bar').style.width = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s) + '%';
530
- document.getElementById('progress-text').innerHTML = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s).toFixed(0) + '%';
531
- }, 1000);
532
-
533
- printTextarea('js: recording ...');
534
-
535
- setTimeout(function () {
536
- if (doRecording) {
537
- printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
538
- stopRecording();
539
- }
540
- }, kMaxAudio_s * 1000);
541
  }
542
 
543
- //
544
- // transcribe
545
- //
546
-
547
- function onProcess(translate) {
548
- if (!instance) {
549
- instance = Module.init('whisper.bin');
550
-
551
- if (instance) {
552
- printTextarea("js: whisper initialized, instance: " + instance);
553
- document.getElementById('model').innerHTML = 'Model loaded: ' + model_whisper;
554
- }
555
- }
556
-
557
- if (!instance) {
558
- printTextarea("js: failed to initialize whisper");
559
- return;
560
- }
561
-
562
- if (!audio) {
563
- printTextarea("js: no audio data");
564
- return;
565
- }
566
 
567
- if (instance) {
568
- printTextarea('');
569
- printTextarea('js: processing - this might take a while ...');
570
- printTextarea('');
571
-
572
- setTimeout(function () {
573
- var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
574
- console.log('js: full_default returned: ' + ret);
575
- if (ret) {
576
- printTextarea("js: whisper returned: " + ret);
577
- }
578
- }, 100);
 
 
 
 
579
  }
 
580
  }
 
581
  </script>
582
  <script type="text/javascript" src="main.js"></script>
583
- </body>
584
-
585
- </html>
 
1
  <!doctype html>
2
  <html lang="en-us">
3
+ <head>
 
4
  <title>whisper.cpp : WASM example</title>
5
 
6
  <style>
7
+ #output {
8
+ width: 100%;
9
+ height: 100%;
10
+ margin: 0 auto;
11
+ margin-top: 10px;
12
+ border-left: 0px;
13
+ border-right: 0px;
14
+ padding-left: 0px;
15
+ padding-right: 0px;
16
+ display: block;
17
+ background-color: black;
18
+ color: white;
19
+ font-size: 10px;
20
+ font-family: "Lucida Console", Monaco, monospace;
21
+ outline: none;
22
+ white-space: pre;
23
+ overflow-wrap: normal;
24
+ overflow-x: scroll;
25
+ }
 
 
 
 
26
  </style>
27
+ </head>
28
+ <body>
 
29
  <div id="main-container">
30
+ <div id="warning" style="display: none; padding: 1rem">
31
+ <b style="color: red; font-size: large"
32
+ >Warning: your browser does not support SharedArrayBuffer please try
33
+ open the page in a new tab.
34
+ </b>
35
+ <a href="https://radames-whisper-wasm.hf.space" target="_blank"
36
+ >https://radames-whisper-wasm.hf.space</a
37
+ >
38
+ </div>
39
+ <b
40
+ >Minimal
41
+ <a href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a>
42
+ example running fully in the browser</b
43
+ >
44
+
45
+ <br /><br />
46
+
47
+ Usage instructions:<br />
48
+ <ul>
49
+ <li>
50
+ Load a ggml model file (you can obtain one from
51
+ <a href="https://ggml.ggerganov.com/">here</a>, recommended:
52
+ <b>tiny</b> or <b>base</b>)
53
+ </li>
54
+ <li>
55
+ Select audio file to transcribe or record audio from the microphone
56
+ (sample: <a href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)
57
+ </li>
58
+ <li>Click on the "Transcribe" button to start the transcription</li>
59
+ </ul>
60
+
61
+ Note that the computation is quite heavy and may take a few seconds to
62
+ complete.<br />
63
+ The transcription results will be displayed in the text area below.<br /><br />
64
+ <b>Important:</b>
65
+ <ul>
66
+ <li>
67
+ your browser must support WASM SIMD instructions for this to work
68
+ </li>
69
+ <li>
70
+ Firefox cannot load files larger than 256 MB - use Chrome instead
71
+ </li>
72
+ </ul>
73
+
74
+ <b>More examples:</b>
75
+ <a href="https://whisper.ggerganov.com/">main</a> |
76
+ <a href="https://whisper.ggerganov.com/bench">bench</a> |
77
+ <a href="https://whisper.ggerganov.com/stream">stream</a> |
78
+ <a href="https://whisper.ggerganov.com/command">command</a> |
79
+ <a href="https://whisper.ggerganov.com/talk">talk</a> |
80
+
81
+ <hr />
82
+
83
+ <div id="model">
84
+ Whisper models: <span id="model-whisper-status"></span><br /><br />
85
+ <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">
86
+ tiny.en (75 MB)
87
+ </button>
88
+ <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">
89
+ tiny (75 MB)
90
+ </button>
91
+ <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">
92
+ base.en (142 MB)
93
+ </button>
94
+ <button id="fetch-whisper-base" onclick="loadWhisper('base')">
95
+ base (142 MB)
96
+ </button>
97
+ <button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">
98
+ small.en (466 MB)
99
+ </button>
100
+ <button id="fetch-whisper-small" onclick="loadWhisper('small')">
101
+ small (466 MB)
102
+ </button>
103
+ <input
104
+ type="file"
105
+ id="whisper-file"
106
+ name="file"
107
+ onchange="loadFile(event, 'whisper.bin')"
108
+ />
109
+ <br /><br />
110
+ Quantized models:<br /><br />
111
+ <button
112
+ id="fetch-whisper-tiny-en-q5_1"
113
+ onclick="loadWhisper('tiny-en-q5_1')"
114
+ >
115
+ tiny.en (Q5_1, 31 MB)
116
+ </button>
117
+ <button id="fetch-whisper-tiny-q5_1" onclick="loadWhisper('tiny-q5_1')">
118
+ tiny (Q5_1, 31 MB)
119
+ </button>
120
+ <button
121
+ id="fetch-whisper-base-en-q5_1"
122
+ onclick="loadWhisper('base-en-q5_1')"
123
+ >
124
+ base.en (Q5_1, 57 MB)
125
+ </button>
126
+ <button id="fetch-whisper-base-q5_1" onclick="loadWhisper('base-q5_1')">
127
+ base (Q5_1, 57 MB)
128
+ </button>
129
+ <button
130
+ id="fetch-whisper-small-en-q5_1"
131
+ onclick="loadWhisper('small-en-q5_1')"
132
+ >
133
+ small.en (Q5_1, 182 MB)
134
+ </button>
135
+ <button
136
+ id="fetch-whisper-small-q5_1"
137
+ onclick="loadWhisper('small-q5_1')"
138
+ >
139
+ small (Q5_1, 182 MB)</button
140
+ ><br />
141
+ <button
142
+ id="fetch-whisper-medium-en-q5_0"
143
+ onclick="loadWhisper('medium-en-q5_0')"
144
+ >
145
+ medium.en (Q5_0, 515 MB)
146
+ </button>
147
+ <button
148
+ id="fetch-whisper-medium-q5_0"
149
+ onclick="loadWhisper('medium-q5_0')"
150
+ >
151
+ medium (Q5_0, 515 MB)
152
+ </button>
153
+ <button
154
+ id="fetch-whisper-large-q5_0"
155
+ onclick="loadWhisper('large-q5_0')"
156
+ >
157
+ large (Q5_0, 1030 MB)
158
+ </button>
159
+ <span id="fetch-whisper-progress"></span>
160
+ </div>
161
+
162
+ <br />
163
+
164
+ <!-- radio button to select between file upload or microphone -->
165
+ <div id="input">
166
+ Input:
167
+ <input
168
+ type="radio"
169
+ id="file"
170
+ name="input"
171
+ value="file"
172
+ checked="checked"
173
+ onchange="changeInput('file')"
174
+ />
175
+ <label for="file">File</label>
176
+ <input
177
+ type="radio"
178
+ id="mic"
179
+ name="input"
180
+ value="mic"
181
+ onchange="changeInput('mic')"
182
+ />
183
+ <label for="mic">Microphone</label>
184
+ </div>
185
+
186
+ <br />
187
+
188
+ <div id="input_file">
189
+ Audio file:
190
+ <input type="file" id="file" name="file" onchange="loadAudio(event)" />
191
+ </div>
192
+
193
+ <div id="input_mic" style="display: none">
194
+ Microphone:
195
+ <button id="start" onclick="startRecording()">Start</button>
196
+ <button id="stop" onclick="stopRecording()" disabled>Stop</button>
197
+
198
+ <!-- progress bar to show recording progress -->
199
+ <br /><br />
200
+ <div id="progress" style="display: none">
201
+ <div
202
+ id="progress-bar"
203
+ style="width: 0%; height: 10px; background-color: #4caf50"
204
+ ></div>
205
+ <div id="progress-text">0%</div>
206
  </div>
207
+ </div>
208
+
209
+ <audio controls="controls" id="audio" loop hidden>
210
+ Your browser does not support the &lt;audio&gt; tag.
211
+ <source id="source" src="" type="audio/wav" />
212
+ </audio>
213
+
214
+ <hr />
215
+ <br />
216
+
217
+ <table>
218
+ <tr>
219
+ <td>
220
+ Language:
221
+ <select id="language" name="language">
222
+ <option value="en">English</option>
223
+ <option value="ar">Arabic</option>
224
+ <option value="hy">Armenian</option>
225
+ <option value="az">Azerbaijani</option>
226
+ <option value="eu">Basque</option>
227
+ <option value="be">Belarusian</option>
228
+ <option value="bn">Bengali</option>
229
+ <option value="bg">Bulgarian</option>
230
+ <option value="ca">Catalan</option>
231
+ <option value="zh">Chinese</option>
232
+ <option value="hr">Croatian</option>
233
+ <option value="cs">Czech</option>
234
+ <option value="da">Danish</option>
235
+ <option value="nl">Dutch</option>
236
+ <option value="en">English</option>
237
+ <option value="et">Estonian</option>
238
+ <option value="tl">Filipino</option>
239
+ <option value="fi">Finnish</option>
240
+ <option value="fr">French</option>
241
+ <option value="gl">Galician</option>
242
+ <option value="ka">Georgian</option>
243
+ <option value="de">German</option>
244
+ <option value="el">Greek</option>
245
+ <option value="gu">Gujarati</option>
246
+ <option value="iw">Hebrew</option>
247
+ <option value="hi">Hindi</option>
248
+ <option value="hu">Hungarian</option>
249
+ <option value="is">Icelandic</option>
250
+ <option value="id">Indonesian</option>
251
+ <option value="ga">Irish</option>
252
+ <option value="it">Italian</option>
253
+ <option value="ja">Japanese</option>
254
+ <option value="kn">Kannada</option>
255
+ <option value="ko">Korean</option>
256
+ <option value="la">Latin</option>
257
+ <option value="lv">Latvian</option>
258
+ <option value="lt">Lithuanian</option>
259
+ <option value="mk">Macedonian</option>
260
+ <option value="ms">Malay</option>
261
+ <option value="mt">Maltese</option>
262
+ <option value="no">Norwegian</option>
263
+ <option value="fa">Persian</option>
264
+ <option value="pl">Polish</option>
265
+ <option value="pt">Portuguese</option>
266
+ <option value="ro">Romanian</option>
267
+ <option value="ru">Russian</option>
268
+ <option value="sr">Serbian</option>
269
+ <option value="sk">Slovak</option>
270
+ <option value="sl">Slovenian</option>
271
+ <option value="es">Spanish</option>
272
+ <option value="sw">Swahili</option>
273
+ <option value="sv">Swedish</option>
274
+ <option value="ta">Tamil</option>
275
+ <option value="te">Telugu</option>
276
+ <option value="th">Thai</option>
277
+ <option value="tr">Turkish</option>
278
+ <option value="uk">Ukrainian</option>
279
+ <option value="ur">Urdu</option>
280
+ <option value="vi">Vietnamese</option>
281
+ <option value="cy">Welsh</option>
282
+ <option value="yi">Yiddish</option>
283
+ </select>
284
+ </td>
285
+ <!-- Slider to select number of threads between 1 and 16 -->
286
+ <td>
287
+ Threads:
288
+ <input
289
+ type="range"
290
+ id="threads"
291
+ name="threads"
292
+ min="1"
293
+ max="16"
294
+ value="8"
295
+ onchange="changeThreads(this.value)"
296
+ />
297
+ <span id="threads-value">8</span>
298
+ </td>
299
+ <td>
300
+ <button onclick="onProcess(false);">Transcribe</button>
301
+ </td>
302
+ <td>
303
+ <button onclick="onProcess(true);">Translate</button>
304
+ </td>
305
+ </tr>
306
+ </table>
307
+
308
+ <br />
309
+
310
+ <!-- textarea with height filling the rest of the page -->
311
+ <textarea id="output" rows="20"></textarea>
312
+
313
+ <br /><br />
314
+
315
+ <div class="cell-version">
316
+ <span>
317
+ | Build time: <span class="nav-link">@GIT_DATE@</span> | Commit hash:
318
+ <a
319
+ class="nav-link"
320
+ href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@"
321
+ >@GIT_SHA1@</a
322
+ >
323
+ | Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
324
+ <a
325
+ class="nav-link"
326
+ href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm"
327
+ >Source Code</a
328
+ >
329
+ |
330
+ </span>
331
+ </div>
332
  </div>
333
 
334
  <script type="text/javascript" src="helpers.js"></script>
335
+ <script type="text/javascript">
336
+ document.addEventListener("DOMContentLoaded", () => {
337
+ //check is shared array buffer is supported
338
+ if (!window.SharedArrayBuffer) {
339
+ document.querySelector("#warning").style.display = "block";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  }
341
+ });
342
+ // TODO: convert audio buffer to WAV
343
+ function setAudio(audio) {
344
+ //if (audio) {
345
+ // // convert to 16-bit PCM
346
+ // var blob = new Blob([audio], { type: 'audio/wav' });
347
+ // var url = URL.createObjectURL(blob);
348
+ // document.getElementById('source').src = url;
349
+ // document.getElementById('audio').hidden = false;
350
+ // document.getElementById('audio').loop = false;
351
+ // document.getElementById('audio').load();
352
+ //} else {
353
+ // document.getElementById('audio').hidden = true;
354
+ //}
355
+ }
356
+
357
+ function changeInput(input) {
358
+ if (input == "file") {
359
+ document.getElementById("input_file").style.display = "block";
360
+ document.getElementById("input_mic").style.display = "none";
361
+ document.getElementById("progress").style.display = "none";
362
+ } else {
363
+ document.getElementById("input_file").style.display = "none";
364
+ document.getElementById("input_mic").style.display = "block";
365
+ document.getElementById("progress").style.display = "block";
366
+ }
367
+ }
368
+
369
+ var Module = {
370
+ print: printTextarea,
371
+ printErr: printTextarea,
372
+ setStatus: function (text) {
373
+ printTextarea("js: " + text);
374
+ },
375
+ monitorRunDependencies: function (left) {},
376
+ };
377
+
378
+ // web audio context
379
+ var context = null;
380
+
381
+ // audio data
382
+ var audio = null;
383
+
384
+ // the whisper instance
385
+ var instance = null;
386
+ var model_whisper = "";
387
+
388
+ // helper function
389
+ function convertTypedArray(src, type) {
390
+ var buffer = new ArrayBuffer(src.byteLength);
391
+ var baseView = new src.constructor(buffer).set(src);
392
+ return new type(buffer);
393
+ }
394
+
395
+ //
396
+ // load model
397
+ //
398
+
399
+ let dbVersion = 1;
400
+ let dbName = "whisper.ggerganov.com";
401
+ let indexedDB =
402
+ window.indexedDB ||
403
+ window.mozIndexedDB ||
404
+ window.webkitIndexedDB ||
405
+ window.msIndexedDB;
406
+
407
+ function storeFS(fname, buf) {
408
+ // write to WASM file using FS_createDataFile
409
+ // if the file exists, delete it
410
+ try {
411
+ Module.FS_unlink(fname);
412
+ } catch (e) {
413
+ // ignore
414
  }
415
 
416
+ Module.FS_createDataFile("/", fname, buf, true, true);
 
 
 
 
 
 
 
 
417
 
418
+ //model_whisper = fname;
 
419
 
420
+ document.getElementById("model-whisper-status").innerHTML =
421
+ 'loaded "' + model_whisper + '"!';
422
 
423
+ printTextarea(
424
+ "storeFS: stored model: " + fname + " size: " + buf.length
425
+ );
426
 
427
+ document.getElementById("model").innerHTML =
428
+ "Model fetched: " + model_whisper;
429
+ }
430
+
431
+ function loadFile(event, fname) {
432
+ var file = event.target.files[0] || null;
433
+ if (file == null) {
434
+ return;
435
  }
436
 
437
+ printTextarea(
438
+ "loadFile: loading model: " +
439
+ file.name +
440
+ ", size: " +
441
+ file.size +
442
+ " bytes"
443
+ );
444
+ printTextarea("loadFile: please wait ...");
445
+
446
+ var reader = new FileReader();
447
+ reader.onload = function (event) {
448
+ var buf = new Uint8Array(reader.result);
449
+ storeFS(fname, buf);
450
+ };
451
+ reader.readAsArrayBuffer(file);
452
+
453
+ document.getElementById("fetch-whisper-tiny-en").style.display = "none";
454
+ document.getElementById("fetch-whisper-base-en").style.display = "none";
455
+ document.getElementById("fetch-whisper-small-en").style.display =
456
+ "none";
457
+ document.getElementById("fetch-whisper-tiny").style.display = "none";
458
+ document.getElementById("fetch-whisper-base").style.display = "none";
459
+ document.getElementById("fetch-whisper-small").style.display = "none";
460
+
461
+ document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
462
+ "none";
463
+ document.getElementById("fetch-whisper-tiny-q5_1").style.display =
464
+ "none";
465
+ document.getElementById("fetch-whisper-base-en-q5_1").style.display =
466
+ "none";
467
+ document.getElementById("fetch-whisper-base-q5_1").style.display =
468
+ "none";
469
+ document.getElementById("fetch-whisper-small-en-q5_1").style.display =
470
+ "none";
471
+ document.getElementById("fetch-whisper-small-q5_1").style.display =
472
+ "none";
473
+ document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
474
+ "none";
475
+ document.getElementById("fetch-whisper-medium-q5_0").style.display =
476
+ "none";
477
+ document.getElementById("fetch-whisper-large-q5_0").style.display =
478
+ "none";
479
+
480
+ document.getElementById("whisper-file").style.display = "none";
481
+ document.getElementById("model-whisper-status").innerHTML =
482
+ "loaded model: " + file.name;
483
+ }
484
+
485
+ function loadWhisper(model) {
486
+ const baseURL = "https://huggingface.co/ggerganov/whisper.cpp/resolve";
487
+ let urls = {
488
+ "tiny.en": `${baseURL}/ggml-model-whisper-tiny.en.bin`,
489
+ tiny: `${baseURL}/ggml-model-whisper-tiny.bin`,
490
+ "base.en": `${baseURL}/ggml-model-whisper-base.en.bin`,
491
+ base: `${baseURL}/ggml-model-whisper-base.bin`,
492
+ "small.en": `${baseURL}/ggml-model-whisper-small.en.bin`,
493
+ small: `${baseURL}/ggml-model-whisper-small.bin`,
494
+
495
+ "tiny-en-q5_1": `${baseURL}/ggml-model-whisper-tiny.en-q5_1.bin`,
496
+ "tiny-q5_1": `${baseURL}/ggml-model-whisper-tiny-q5_1.bin`,
497
+ "base-en-q5_1": `${baseURL}/ggml-model-whisper-base.en-q5_1.bin`,
498
+ "base-q5_1": `${baseURL}/ggml-model-whisper-base-q5_1.bin`,
499
+ "small-en-q5_1": `${baseURL}/ggml-model-whisper-small.en-q5_1.bin`,
500
+ "small-q5_1": `${baseURL}/ggml-model-whisper-small-q5_1.bin`,
501
+ "medium-en-q5_0": `${baseURL}/ggml-model-whisper-medium.en-q5_0.bin`,
502
+ "medium-q5_0": `${baseURL}/ggml-model-whisper-medium-q5_0.bin`,
503
+ "large-q5_0": `${baseURL}/ggml-model-whisper-large-q5_0.bin`,
504
+ };
505
 
506
+ let sizes = {
507
+ "tiny.en": 75,
508
+ tiny: 75,
509
+ "base.en": 142,
510
+ base: 142,
511
+ "small.en": 466,
512
+ small: 466,
513
+
514
+ "tiny-en-q5_1": 31,
515
+ "tiny-q5_1": 31,
516
+ "base-en-q5_1": 57,
517
+ "base-q5_1": 57,
518
+ "small-en-q5_1": 182,
519
+ "small-q5_1": 182,
520
+ "medium-en-q5_0": 515,
521
+ "medium-q5_0": 515,
522
+ "large-q5_0": 1030,
523
+ };
524
 
525
+ let url = urls[model];
526
+ let dst = "whisper.bin";
527
+ let size_mb = sizes[model];
528
+
529
+ model_whisper = model;
530
+
531
+ document.getElementById("fetch-whisper-tiny-en").style.display = "none";
532
+ document.getElementById("fetch-whisper-base-en").style.display = "none";
533
+ document.getElementById("fetch-whisper-small-en").style.display =
534
+ "none";
535
+ document.getElementById("fetch-whisper-tiny").style.display = "none";
536
+ document.getElementById("fetch-whisper-base").style.display = "none";
537
+ document.getElementById("fetch-whisper-small").style.display = "none";
538
+
539
+ document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
540
+ "none";
541
+ document.getElementById("fetch-whisper-tiny-q5_1").style.display =
542
+ "none";
543
+ document.getElementById("fetch-whisper-base-en-q5_1").style.display =
544
+ "none";
545
+ document.getElementById("fetch-whisper-base-q5_1").style.display =
546
+ "none";
547
+ document.getElementById("fetch-whisper-small-en-q5_1").style.display =
548
+ "none";
549
+ document.getElementById("fetch-whisper-small-q5_1").style.display =
550
+ "none";
551
+ document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
552
+ "none";
553
+ document.getElementById("fetch-whisper-medium-q5_0").style.display =
554
+ "none";
555
+ document.getElementById("fetch-whisper-large-q5_0").style.display =
556
+ "none";
557
+
558
+ document.getElementById("whisper-file").style.display = "none";
559
+ document.getElementById("model-whisper-status").innerHTML =
560
+ "loading model: " + model;
561
+
562
+ cbProgress = function (p) {
563
+ let el = document.getElementById("fetch-whisper-progress");
564
+ el.innerHTML = Math.round(100 * p) + "%";
565
+ };
566
 
567
+ cbCancel = function () {
568
+ var el;
569
+
570
+ el = document.getElementById("fetch-whisper-tiny-en");
571
+ if (el) el.style.display = "inline-block";
572
+ el = document.getElementById("fetch-whisper-base-en");
573
+ if (el) el.style.display = "inline-block";
574
+ el = document.getElementById("fetch-whisper-small-en");
575
+ if (el) el.style.display = "inline-block";
576
+ el = document.getElementById("fetch-whisper-tiny");
577
+ if (el) el.style.display = "inline-block";
578
+ el = document.getElementById("fetch-whisper-base");
579
+ if (el) el.style.display = "inline-block";
580
+ el = document.getElementById("fetch-whisper-small");
581
+ if (el) el.style.display = "inline-block";
582
+
583
+ el = document.getElementById("fetch-whisper-tiny-en-q5_1");
584
+ if (el) el.style.display = "inline-block";
585
+ el = document.getElementById("fetch-whisper-tiny-q5_1");
586
+ if (el) el.style.display = "inline-block";
587
+ el = document.getElementById("fetch-whisper-base-en-q5_1");
588
+ if (el) el.style.display = "inline-block";
589
+ el = document.getElementById("fetch-whisper-base-q5_1");
590
+ if (el) el.style.display = "inline-block";
591
+ el = document.getElementById("fetch-whisper-small-en-q5_1");
592
+ if (el) el.style.display = "inline-block";
593
+ el = document.getElementById("fetch-whisper-small-q5_1");
594
+ if (el) el.style.display = "inline-block";
595
+ el = document.getElementById("fetch-whisper-medium-en-q5_0");
596
+ if (el) el.style.display = "inline-block";
597
+ el = document.getElementById("fetch-whisper-medium-q5_0");
598
+ if (el) el.style.display = "inline-block";
599
+ el = document.getElementById("fetch-whisper-large-q5_0");
600
+ if (el) el.style.display = "inline-block";
601
+
602
+ el = document.getElementById("whisper-file");
603
+ if (el) el.style.display = "inline-block";
604
+ el = document.getElementById("model-whisper-status");
605
+ if (el) el.innerHTML = "";
606
+ };
607
 
608
+ loadRemote(
609
+ url,
610
+ dst,
611
+ size_mb,
612
+ cbProgress,
613
+ storeFS,
614
+ cbCancel,
615
+ printTextarea
616
+ );
617
+ }
618
+
619
+ //
620
+ // audio file
621
+ //
622
+
623
+ const kMaxAudio_s = 30 * 60;
624
+ const kMaxRecording_s = 2 * 60;
625
+ const kSampleRate = 16000;
626
+
627
+ window.AudioContext = window.AudioContext || window.webkitAudioContext;
628
+ window.OfflineAudioContext =
629
+ window.OfflineAudioContext || window.webkitOfflineAudioContext;
630
+
631
+ function loadAudio(event) {
632
+ if (!context) {
633
+ context = new AudioContext({
634
+ sampleRate: kSampleRate,
635
+ channelCount: 1,
636
+ echoCancellation: false,
637
+ autoGainControl: true,
638
+ noiseSuppression: true,
639
+ });
640
  }
641
 
642
+ var file = event.target.files[0] || null;
643
+ if (file == null) {
644
+ return;
645
+ }
 
646
 
647
+ printTextarea(
648
+ "js: loading audio: " + file.name + ", size: " + file.size + " bytes"
649
+ );
650
+ printTextarea("js: please wait ...");
651
+
652
+ var reader = new FileReader();
653
+ reader.onload = function (event) {
654
+ var buf = new Uint8Array(reader.result);
655
+
656
+ context.decodeAudioData(
657
+ buf.buffer,
658
+ function (audioBuffer) {
659
+ var offlineContext = new OfflineAudioContext(
660
+ audioBuffer.numberOfChannels,
661
+ audioBuffer.length,
662
+ audioBuffer.sampleRate
663
+ );
664
+ var source = offlineContext.createBufferSource();
665
+ source.buffer = audioBuffer;
666
+ source.connect(offlineContext.destination);
667
+ source.start(0);
668
+
669
+ offlineContext.startRendering().then(function (renderedBuffer) {
670
+ audio = renderedBuffer.getChannelData(0);
671
+ printTextarea("js: audio loaded, size: " + audio.length);
672
+
673
+ // truncate to first 30 seconds
674
+ if (audio.length > kMaxAudio_s * kSampleRate) {
675
+ audio = audio.slice(0, kMaxAudio_s * kSampleRate);
676
+ printTextarea(
677
+ "js: truncated audio to first " + kMaxAudio_s + " seconds"
678
+ );
679
+ }
680
 
681
+ setAudio(audio);
682
+ });
683
+ },
684
+ function (e) {
685
+ printTextarea("js: error decoding audio: " + e);
686
+ audio = null;
687
+ setAudio(audio);
688
  }
689
+ );
690
+ };
691
+ reader.readAsArrayBuffer(file);
692
+ }
693
+
694
+ //
695
+ // microphone
696
+ //
697
+
698
+ var mediaRecorder = null;
699
+ var doRecording = false;
700
+ var startTime = 0;
701
+
702
+ function stopRecording() {
703
+ doRecording = false;
704
+ }
705
+
706
+ // record up to kMaxRecording_s seconds of audio from the microphone
707
+ // check if doRecording is false every 1000 ms and stop recording if so
708
+ // update progress information
709
+ function startRecording() {
710
+ if (!context) {
711
+ context = new AudioContext({
712
+ sampleRate: kSampleRate,
713
+ channelCount: 1,
714
+ echoCancellation: false,
715
+ autoGainControl: true,
716
+ noiseSuppression: true,
717
+ });
718
  }
719
 
720
+ document.getElementById("start").disabled = true;
721
+ document.getElementById("stop").disabled = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
 
723
+ document.getElementById("progress-bar").style.width = "0%";
724
+ document.getElementById("progress-text").innerHTML = "0%";
 
725
 
726
+ doRecording = true;
727
+ startTime = Date.now();
728
 
729
+ var chunks = [];
730
+ var stream = null;
 
 
 
 
 
 
 
 
 
731
 
732
+ navigator.mediaDevices
733
+ .getUserMedia({ audio: true, video: false })
734
+ .then(function (s) {
735
+ stream = s;
736
+ mediaRecorder = new MediaRecorder(stream);
737
+ mediaRecorder.ondataavailable = function (e) {
738
+ chunks.push(e.data);
 
739
  };
740
+ mediaRecorder.onstop = function (e) {
741
+ var blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
742
+ chunks = [];
743
 
744
+ document.getElementById("start").disabled = false;
745
+ document.getElementById("stop").disabled = true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
 
747
+ var reader = new FileReader();
748
+ reader.onload = function (event) {
749
  var buf = new Uint8Array(reader.result);
750
 
751
+ context.decodeAudioData(
752
+ buf.buffer,
753
+ function (audioBuffer) {
754
+ var offlineContext = new OfflineAudioContext(
755
+ audioBuffer.numberOfChannels,
756
+ audioBuffer.length,
757
+ audioBuffer.sampleRate
758
+ );
759
  var source = offlineContext.createBufferSource();
760
  source.buffer = audioBuffer;
761
  source.connect(offlineContext.destination);
762
  source.start(0);
763
 
764
+ offlineContext
765
+ .startRendering()
766
+ .then(function (renderedBuffer) {
767
  audio = renderedBuffer.getChannelData(0);
768
+ printTextarea(
769
+ "js: audio recorded, size: " + audio.length
770
+ );
771
 
772
  // truncate to first 30 seconds
773
+ if (audio.length > kMaxRecording_s * kSampleRate) {
774
+ audio = audio.slice(0, kMaxRecording_s * kSampleRate);
775
+ printTextarea(
776
+ "js: truncated audio to first " +
777
+ kMaxRecording_s +
778
+ " seconds"
779
+ );
780
  }
 
781
  setAudio(audio);
782
+ });
783
+ },
784
+ function (e) {
785
+ printTextarea("js: error decoding audio: " + e);
786
  audio = null;
787
  setAudio(audio);
788
+ }
789
+ );
790
+ };
 
791
 
792
+ reader.readAsArrayBuffer(blob);
793
+ };
794
+ mediaRecorder.start();
795
+ })
796
+ .catch(function (err) {
797
+ printTextarea("js: error getting audio stream: " + err);
798
+ });
799
+
800
+ var interval = setInterval(function () {
801
+ if (!doRecording) {
802
+ clearInterval(interval);
803
+ mediaRecorder.stop();
804
+ stream.getTracks().forEach(function (track) {
805
+ track.stop();
806
+ });
807
+ }
808
+
809
+ document.getElementById("progress-bar").style.width =
810
+ (100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s + "%";
811
+ document.getElementById("progress-text").innerHTML =
812
+ ((100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s).toFixed(
813
+ 0
814
+ ) + "%";
815
+ }, 1000);
816
+
817
+ printTextarea("js: recording ...");
818
+
819
+ setTimeout(function () {
820
+ if (doRecording) {
821
+ printTextarea(
822
+ "js: recording stopped after " + kMaxRecording_s + " seconds"
823
+ );
824
+ stopRecording();
825
+ }
826
+ }, kMaxRecording_s * 1000);
827
+ }
828
+
829
+ //
830
+ // transcribe
831
+ //
832
+
833
+ var nthreads = 8;
834
+
835
+ function changeThreads(value) {
836
+ nthreads = value;
837
+ document.getElementById("threads-value").innerHTML = nthreads;
838
+ }
839
+
840
+ function onProcess(translate) {
841
+ if (!instance) {
842
+ instance = Module.init("whisper.bin");
843
+
844
+ if (instance) {
845
+ printTextarea("js: whisper initialized, instance: " + instance);
846
+ document.getElementById("model").innerHTML =
847
+ "Model loaded: " + model_whisper;
848
+ }
849
  }
850
 
851
+ if (!instance) {
852
+ printTextarea("js: failed to initialize whisper");
853
+ return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
854
  }
855
 
856
+ if (!audio) {
857
+ printTextarea("js: no audio data");
858
+ return;
859
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
 
861
+ if (instance) {
862
+ printTextarea("");
863
+ printTextarea("js: processing - this might take a while ...");
864
+ printTextarea("");
865
+
866
+ setTimeout(function () {
867
+ var ret = Module.full_default(
868
+ instance,
869
+ audio,
870
+ document.getElementById("language").value,
871
+ nthreads,
872
+ translate
873
+ );
874
+ console.log("js: full_default returned: " + ret);
875
+ if (ret) {
876
+ printTextarea("js: whisper returned: " + ret);
877
  }
878
+ }, 100);
879
  }
880
+ }
881
  </script>
882
  <script type="text/javascript" src="main.js"></script>
883
+ </body>
884
+ </html>