Spaces:

radames
/

whisper.cpp-wasm

Running

App Files Files Community

radames commited on Aug 23, 2023

Commit

87ab4d8

1 Parent(s): 534060f

Update index.html

Browse files

Files changed (1) hide show

index.html +822 -523

index.html CHANGED Viewed

@@ -1,585 +1,884 @@
 <!doctype html>
 <html lang="en-us">
-<head>
     <title>whisper.cpp : WASM example</title>
     <style>
-        body {
-            background-color: white;
-        }
-        #output {
-            width: 100%;
-            height: 100%;
-            margin: 0 auto;
-            margin-top: 10px;
-            border-left: 0px;
-            border-right: 0px;
-            padding-left: 0px;
-            padding-right: 0px;
-            display: block;
-            background-color: black;
-            color: white;
-            font-size: 10px;
-            font-family: 'Lucida Console', Monaco, monospace;
-            outline: none;
-            white-space: pre;
-            overflow-wrap: normal;
-            overflow-x: scroll;
-        }
     </style>
-</head>
-<body>
     <div id="main-container">
-        <div id="warning" style="display: none; padding: 1rem;">
-            <b style="color: red ; font-size: large;">Warning: your browser does not support SharedArrayBuffer please
-                try open the page in a new tab.
-            </b> <a href="https://radames-whisper-wasm.hf.space"
-                target="_blank">https://radames-whisper-wasm.hf.space</a>
-        </div>
-        <b>Minimal <a target="_blank" href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a> example running
-            fully in the browser</b>
-        <br><br>
-        Usage instructions:<br>
-        <ul>
-            <li>Load a ggml model file (you can obtain one from <a target="_blank"
-                    href="https://ggml.ggerganov.com/">here</a>, recommended: <b>tiny</b> or <b>base</b>)</li>
-            <li>Select audio file to transcribe or record audio from the microphone (sample: <a target="_blank"
-                    href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)</li>
-            <li>Click on the "Transcribe" button to start the transcription</li>
-        </ul>
-        Note that the computation is quite heavy and may take a few seconds to complete.<br>
-        The transcription results will be displayed in the text area below.<br><br>
-        <b>Important: your browser must support WASM SIMD instructions for this to work.</b>
-        <br><br>
-        <hr>
-        <div id="model">
-            Whisper model: <span id="model-whisper-status"></span>
-            <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-            <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
-            <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-            <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
-            <button id="fetch-whisper-extra" onclick="loadWhisper('extra')">EXTRA</button>
-            <span id="fetch-whisper-progress"></span>
-            <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-        </div>
-        <br>
-        <!-- radio button to select between file upload or microphone -->
-        <div id="input">
-            Input:
-            <input type="radio" id="file" name="input" value="file" checked="checked" onchange="changeInput('file')" />
-            File
-            <input type="radio" id="mic" name="input" value="mic" onchange="changeInput('mic')" /> Microphone
-        </div>
-        <br>
-        <div id="input_file">
-            Audio file:
-            <input type="file" id="file" name="file" onchange="loadAudio(event)" />
-        </div>
-        <div id="input_mic" style="display: none;">
-            Microphone:
-            <button id="start" onclick="startRecording()">Start</button>
-            <button id="stop" onclick="stopRecording()" disabled>Stop</button>
-            <!-- progress bar to show recording progress -->
-            <br><br>
-            <div id="progress" style="display: none;">
-                <div id="progress-bar" style="width: 0%; height: 10px; background-color: #4CAF50;"></div>
-                <div id="progress-text">0%</div>
-            </div>
-        </div>
-        <audio controls="controls" id="audio" loop hidden>
-            Your browser does not support the &lt;audio&gt; tag.
-            <source id="source" src="" type="audio/wav" />
-        </audio>
-        <hr><br>
-        <table>
-            <tr>
-                <td>
-                    Language:
-                    <select id="language" name="language">
-                        <option value="en">English</option>
-                        <option value="ar">Arabic</option>
-                        <option value="hy">Armenian</option>
-                        <option value="az">Azerbaijani</option>
-                        <option value="eu">Basque</option>
-                        <option value="be">Belarusian</option>
-                        <option value="bn">Bengali</option>
-                        <option value="bg">Bulgarian</option>
-                        <option value="ca">Catalan</option>
-                        <option value="zh">Chinese</option>
-                        <option value="hr">Croatian</option>
-                        <option value="cs">Czech</option>
-                        <option value="da">Danish</option>
-                        <option value="nl">Dutch</option>
-                        <option value="en">English</option>
-                        <option value="et">Estonian</option>
-                        <option value="tl">Filipino</option>
-                        <option value="fi">Finnish</option>
-                        <option value="fr">French</option>
-                        <option value="gl">Galician</option>
-                        <option value="ka">Georgian</option>
-                        <option value="de">German</option>
-                        <option value="el">Greek</option>
-                        <option value="gu">Gujarati</option>
-                        <option value="iw">Hebrew</option>
-                        <option value="hi">Hindi</option>
-                        <option value="hu">Hungarian</option>
-                        <option value="is">Icelandic</option>
-                        <option value="id">Indonesian</option>
-                        <option value="ga">Irish</option>
-                        <option value="it">Italian</option>
-                        <option value="ja">Japanese</option>
-                        <option value="kn">Kannada</option>
-                        <option value="ko">Korean</option>
-                        <option value="la">Latin</option>
-                        <option value="lv">Latvian</option>
-                        <option value="lt">Lithuanian</option>
-                        <option value="mk">Macedonian</option>
-                        <option value="ms">Malay</option>
-                        <option value="mt">Maltese</option>
-                        <option value="no">Norwegian</option>
-                        <option value="fa">Persian</option>
-                        <option value="pl">Polish</option>
-                        <option value="pt">Portuguese</option>
-                        <option value="ro">Romanian</option>
-                        <option value="ru">Russian</option>
-                        <option value="sr">Serbian</option>
-                        <option value="sk">Slovak</option>
-                        <option value="sl">Slovenian</option>
-                        <option value="es">Spanish</option>
-                        <option value="sw">Swahili</option>
-                        <option value="sv">Swedish</option>
-                        <option value="ta">Tamil</option>
-                        <option value="te">Telugu</option>
-                        <option value="th">Thai</option>
-                        <option value="tr">Turkish</option>
-                        <option value="uk">Ukrainian</option>
-                        <option value="ur">Urdu</option>
-                        <option value="vi">Vietnamese</option>
-                        <option value="cy">Welsh</option>
-                        <option value="yi">Yiddish</option>
-                    </select>
-                </td>
-                <td>
-                    <button onclick="onProcess(false);">Transcribe</button>
-                </td>
-                <td>
-                    <button onclick="onProcess(true);">Translate</button>
-                </td>
-            </tr>
-        </table>
-        <br>
-        <!-- textarea with height filling the rest of the page -->
-        <textarea id="output" rows="20"></textarea>
-        <br><br>
-        <div class="cell-version">
-            <span>
-                |
-                Build time: <span class="nav-link">Mon Jan 16 11:57:35 2023</span> |
-                Commit hash: <a class="nav-link" target="_blank"
-                    href="https://github.com/ggerganov/whisper.cpp/commit/49b529ba">49b529ba</a> |
-                Commit subject: <span class="nav-link">whisper.android : add support for loading directly from asset in
-                    C (#415)</span> |
-                <a class="nav-link" target="_blank"
-                    href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm">Source Code</a> |
-            </span>
         </div>
     </div>
     <script type="text/javascript" src="helpers.js"></script>
-    <script type='text/javascript'>
-        document.addEventListener("DOMContentLoaded", () => {
-            //check is shared array buffer is supported
-            if (!window.SharedArrayBuffer) {
-                document.querySelector("#warning").style.display = "block";
-            }
-        })
-        // TODO: convert audio buffer to WAV
-        function setAudio(audio) {
-            //if (audio) {
-            //    // convert to 16-bit PCM
-            //    var blob = new Blob([audio], { type: 'audio/wav' });
-            //    var url = URL.createObjectURL(blob);
-            //    document.getElementById('source').src = url;
-            //    document.getElementById('audio').hidden = false;
-            //    document.getElementById('audio').loop = false;
-            //    document.getElementById('audio').load();
-            //} else {
-            //    document.getElementById('audio').hidden = true;
-            //}
         }
-        function changeInput(input) {
-            if (input == 'file') {
-                document.getElementById('input_file').style.display = 'block';
-                document.getElementById('input_mic').style.display = 'none';
-                document.getElementById('progress').style.display = 'none';
-            } else {
-                document.getElementById('input_file').style.display = 'none';
-                document.getElementById('input_mic').style.display = 'block';
-                document.getElementById('progress').style.display = 'block';
-            }
         }
-        var Module = {
-            print: printTextarea,
-            printErr: printTextarea,
-            setStatus: function (text) {
-                printTextarea('js: ' + text);
-            },
-            monitorRunDependencies: function (left) {
-            }
-        };
-        // web audio context
-        var context = null;
-        // audio data
-        var audio = null;
-        // the whisper instance
-        var instance = null;
-        var model_whisper = '';
-        // helper function
-        function convertTypedArray(src, type) {
-            var buffer = new ArrayBuffer(src.byteLength);
-            var baseView = new src.constructor(buffer).set(src);
-            return new type(buffer);
         }
-        //
-        // load model
-        //
-        let dbVersion = 1
-        let dbName = 'whisper.ggerganov.com';
-        let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-        function storeFS(fname, buf) {
-            // write to WASM file using FS_createDataFile
-            // if the file exists, delete it
-            try {
-                Module.FS_unlink(fname);
-            } catch (e) {
-                // ignore
-            }
-            Module.FS_createDataFile("/", fname, buf, true, true);
-            model_whisper = fname;
-            document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-            printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
         }
-        function loadFile(event, fname) {
-            var file = event.target.files[0] || null;
-            if (file == null) {
-                return;
-            }
-            printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
-            printTextarea('loadFile: please wait ...');
-            var reader = new FileReader();
-            reader.onload = function (event) {
-                var buf = new Uint8Array(reader.result);
-                storeFS(fname, buf);
             }
-            reader.readAsArrayBuffer(file);
-            document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-            document.getElementById('fetch-whisper-base-en').style.display = 'none';
-            document.getElementById('fetch-whisper-tiny').style.display = 'none';
-            document.getElementById('fetch-whisper-base').style.display = 'none';
-            document.getElementById('whisper-file').style.display = 'none';
-            document.getElementById('model-whisper-status').innerHTML = 'loaded model: ' + file.name;
         }
-        function loadWhisper(model) {
-            let urls = {
-                'tiny.en': 'models/ggml-tiny.en.bin',
-                'tiny': 'models/ggml-tiny.bin',
-                'base.en': 'models/ggml-base.en.bin',
-                'base': 'models/ggml-base.bin',
-                'extra': 'https://huggingface.co/radames/general/resolve/main/ggml-tiny.bin'
-            };
-            let sizes = {
-                'tiny.en': 75,
-                'tiny': 75,
-                'base.en': 142,
-                'base': 142,
-            };
-            let url = urls[model];
-            let dst = 'whisper.bin';
-            let size_mb = sizes[model];
-            model_whisper = model;
-            document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-            document.getElementById('fetch-whisper-base-en').style.display = 'none';
-            document.getElementById('fetch-whisper-tiny').style.display = 'none';
-            document.getElementById('fetch-whisper-base').style.display = 'none';
-            document.getElementById('whisper-file').style.display = 'none';
-            document.getElementById('model-whisper-status').innerHTML = 'loading model: ' + model;
-            cbProgress = function (p) {
-                let el = document.getElementById('fetch-whisper-progress');
-                el.innerHTML = Math.round(100 * p) + '%';
-            };
-            cbCancel = function () {
-                var el;
-                el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                el = document.getElementById('fetch-whisper-tiny'); if (el) el.style.display = 'inline-block';
-                el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
-                el = document.getElementById('whisper-file'); if (el) el.style.display = 'inline-block';
-                el = document.getElementById('model-whisper-status'); if (el) el.innerHTML = '';
             };
-            loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-        }
-        //
-        // audio file
-        //
-        const kMaxAudio_s = 120;
-        const kSampleRate = 16000;
-        window.AudioContext = window.AudioContext || window.webkitAudioContext;
-        window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-        function loadAudio(event) {
-            if (!context) {
-                context = new AudioContext({
-                    sampleRate: kSampleRate,
-                    channelCount: 1,
-                    echoCancellation: false,
-                    autoGainControl: true,
-                    noiseSuppression: true,
-                });
-            }
-            var file = event.target.files[0] || null;
-            if (file == null) {
-                return;
-            }
-            printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
-            printTextarea('js: please wait ...');
-            var reader = new FileReader();
-            reader.onload = function (event) {
                 var buf = new Uint8Array(reader.result);
-                context.decodeAudioData(buf.buffer, function (audioBuffer) {
-                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
                     var source = offlineContext.createBufferSource();
                     source.buffer = audioBuffer;
                     source.connect(offlineContext.destination);
                     source.start(0);
-                    offlineContext.startRendering().then(function (renderedBuffer) {
                         audio = renderedBuffer.getChannelData(0);
-                        printTextarea('js: audio loaded, size: ' + audio.length);
                         // truncate to first 30 seconds
-                        if (audio.length > kMaxAudio_s * kSampleRate) {
-                            audio = audio.slice(0, kMaxAudio_s * kSampleRate);
-                            printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
                         }
                         setAudio(audio);
-                    });
-                }, function (e) {
-                    printTextarea('js: error decoding audio: ' + e);
                     audio = null;
                     setAudio(audio);
-                });
-            }
-            reader.readAsArrayBuffer(file);
-        }
-        //
-        // microphone
-        //
-        var mediaRecorder = null;
-        var doRecording = false;
-        var startTime = 0;
-        function stopRecording() {
-            doRecording = false;
         }
-        // record up to kMaxAudio_s seconds of audio from the microphone
-        // check if doRecording is false every 1000 ms and stop recording if so
-        // update progress information
-        function startRecording() {
-            if (!context) {
-                context = new AudioContext({
-                    sampleRate: kSampleRate,
-                    channelCount: 1,
-                    echoCancellation: false,
-                    autoGainControl: true,
-                    noiseSuppression: true,
-                });
-            }
-            document.getElementById('start').disabled = true;
-            document.getElementById('stop').disabled = false;
-            document.getElementById('progress-bar').style.width = '0%';
-            document.getElementById('progress-text').innerHTML = '0%';
-            doRecording = true;
-            startTime = Date.now();
-            var chunks = [];
-            var stream = null;
-            navigator.mediaDevices.getUserMedia({ audio: true, video: false })
-                .then(function (s) {
-                    stream = s;
-                    mediaRecorder = new MediaRecorder(stream);
-                    mediaRecorder.ondataavailable = function (e) {
-                        chunks.push(e.data);
-                    };
-                    mediaRecorder.onstop = function (e) {
-                        var blob = new Blob(chunks, { 'type': 'audio/ogg; codecs=opus' });
-                        chunks = [];
-                        document.getElementById('start').disabled = false;
-                        document.getElementById('stop').disabled = true;
-                        var reader = new FileReader();
-                        reader.onload = function (event) {
-                            var buf = new Uint8Array(reader.result);
-                            context.decodeAudioData(buf.buffer, function (audioBuffer) {
-                                var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                var source = offlineContext.createBufferSource();
-                                source.buffer = audioBuffer;
-                                source.connect(offlineContext.destination);
-                                source.start(0);
-                                offlineContext.startRendering().then(function (renderedBuffer) {
-                                    audio = renderedBuffer.getChannelData(0);
-                                    printTextarea('js: audio recorded, size: ' + audio.length);
-                                    // truncate to first 30 seconds
-                                    if (audio.length > kMaxAudio_s * kSampleRate) {
-                                        audio = audio.slice(0, kMaxAudio_s * kSampleRate);
-                                        printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
-                                    }
-                                    setAudio(audio);
-                                });
-                            }, function (e) {
-                                printTextarea('js: error decoding audio: ' + e);
-                                audio = null;
-                                setAudio(audio);
-                            });
-                        }
-                        reader.readAsArrayBuffer(blob);
-                    };
-                    mediaRecorder.start();
-                })
-                .catch(function (err) {
-                    printTextarea('js: error getting audio stream: ' + err);
-                });
-            var interval = setInterval(function () {
-                if (!doRecording) {
-                    clearInterval(interval);
-                    mediaRecorder.stop();
-                    stream.getTracks().forEach(function (track) {
-                        track.stop();
-                    });
-                }
-                document.getElementById('progress-bar').style.width = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s) + '%';
-                document.getElementById('progress-text').innerHTML = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s).toFixed(0) + '%';
-            }, 1000);
-            printTextarea('js: recording ...');
-            setTimeout(function () {
-                if (doRecording) {
-                    printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
-                    stopRecording();
-                }
-            }, kMaxAudio_s * 1000);
         }
-        //
-        // transcribe
-        //
-        function onProcess(translate) {
-            if (!instance) {
-                instance = Module.init('whisper.bin');
-                if (instance) {
-                    printTextarea("js: whisper initialized, instance: " + instance);
-                    document.getElementById('model').innerHTML = 'Model loaded: ' + model_whisper;
-                }
-            }
-            if (!instance) {
-                printTextarea("js: failed to initialize whisper");
-                return;
-            }
-            if (!audio) {
-                printTextarea("js: no audio data");
-                return;
-            }
-            if (instance) {
-                printTextarea('');
-                printTextarea('js: processing - this might take a while ...');
-                printTextarea('');
-                setTimeout(function () {
-                    var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
-                    console.log('js: full_default returned: ' + ret);
-                    if (ret) {
-                        printTextarea("js: whisper returned: " + ret);
-                    }
-                }, 100);
             }
         }
     </script>
     <script type="text/javascript" src="main.js"></script>
-</body>
-</html>

 <!doctype html>
 <html lang="en-us">
+  <head>
     <title>whisper.cpp : WASM example</title>
     <style>
+      #output {
+        width: 100%;
+        height: 100%;
+        margin: 0 auto;
+        margin-top: 10px;
+        border-left: 0px;
+        border-right: 0px;
+        padding-left: 0px;
+        padding-right: 0px;
+        display: block;
+        background-color: black;
+        color: white;
+        font-size: 10px;
+        font-family: "Lucida Console", Monaco, monospace;
+        outline: none;
+        white-space: pre;
+        overflow-wrap: normal;
+        overflow-x: scroll;
+      }
     </style>
+  </head>
+  <body>
     <div id="main-container">
+      <div id="warning" style="display: none; padding: 1rem">
+        <b style="color: red; font-size: large"
+          >Warning: your browser does not support SharedArrayBuffer please try
+          open the page in a new tab.
+        </b>
+        <a href="https://radames-whisper-wasm.hf.space" target="_blank"
+          >https://radames-whisper-wasm.hf.space</a
+        >
+      </div>
+      <b
+        >Minimal
+        <a href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a>
+        example running fully in the browser</b
+      >
+      <br /><br />
+      Usage instructions:<br />
+      <ul>
+        <li>
+          Load a ggml model file (you can obtain one from
+          <a href="https://ggml.ggerganov.com/">here</a>, recommended:
+          <b>tiny</b> or <b>base</b>)
+        </li>
+        <li>
+          Select audio file to transcribe or record audio from the microphone
+          (sample: <a href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)
+        </li>
+        <li>Click on the "Transcribe" button to start the transcription</li>
+      </ul>
+      Note that the computation is quite heavy and may take a few seconds to
+      complete.<br />
+      The transcription results will be displayed in the text area below.<br /><br />
+      <b>Important:</b>
+      <ul>
+        <li>
+          your browser must support WASM SIMD instructions for this to work
+        </li>
+        <li>
+          Firefox cannot load files larger than 256 MB - use Chrome instead
+        </li>
+      </ul>
+      <b>More examples:</b>
+      <a href="https://whisper.ggerganov.com/">main</a> |
+      <a href="https://whisper.ggerganov.com/bench">bench</a> |
+      <a href="https://whisper.ggerganov.com/stream">stream</a> |
+      <a href="https://whisper.ggerganov.com/command">command</a> |
+      <a href="https://whisper.ggerganov.com/talk">talk</a> |
+      <hr />
+      <div id="model">
+        Whisper models: <span id="model-whisper-status"></span><br /><br />
+        <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">
+          tiny.en (75 MB)
+        </button>
+        <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">
+          tiny (75 MB)
+        </button>
+        <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">
+          base.en (142 MB)
+        </button>
+        <button id="fetch-whisper-base" onclick="loadWhisper('base')">
+          base (142 MB)
+        </button>
+        <button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">
+          small.en (466 MB)
+        </button>
+        <button id="fetch-whisper-small" onclick="loadWhisper('small')">
+          small (466 MB)
+        </button>
+        <input
+          type="file"
+          id="whisper-file"
+          name="file"
+          onchange="loadFile(event, 'whisper.bin')"
+        />
+        <br /><br />
+        Quantized models:<br /><br />
+        <button
+          id="fetch-whisper-tiny-en-q5_1"
+          onclick="loadWhisper('tiny-en-q5_1')"
+        >
+          tiny.en (Q5_1, 31 MB)
+        </button>
+        <button id="fetch-whisper-tiny-q5_1" onclick="loadWhisper('tiny-q5_1')">
+          tiny (Q5_1, 31 MB)
+        </button>
+        <button
+          id="fetch-whisper-base-en-q5_1"
+          onclick="loadWhisper('base-en-q5_1')"
+        >
+          base.en (Q5_1, 57 MB)
+        </button>
+        <button id="fetch-whisper-base-q5_1" onclick="loadWhisper('base-q5_1')">
+          base (Q5_1, 57 MB)
+        </button>
+        <button
+          id="fetch-whisper-small-en-q5_1"
+          onclick="loadWhisper('small-en-q5_1')"
+        >
+          small.en (Q5_1, 182 MB)
+        </button>
+        <button
+          id="fetch-whisper-small-q5_1"
+          onclick="loadWhisper('small-q5_1')"
+        >
+          small (Q5_1, 182 MB)</button
+        ><br />
+        <button
+          id="fetch-whisper-medium-en-q5_0"
+          onclick="loadWhisper('medium-en-q5_0')"
+        >
+          medium.en (Q5_0, 515 MB)
+        </button>
+        <button
+          id="fetch-whisper-medium-q5_0"
+          onclick="loadWhisper('medium-q5_0')"
+        >
+          medium (Q5_0, 515 MB)
+        </button>
+        <button
+          id="fetch-whisper-large-q5_0"
+          onclick="loadWhisper('large-q5_0')"
+        >
+          large (Q5_0, 1030 MB)
+        </button>
+        <span id="fetch-whisper-progress"></span>
+      </div>
+      <br />
+      <!-- radio button to select between file upload or microphone -->
+      <div id="input">
+        Input:
+        <input
+          type="radio"
+          id="file"
+          name="input"
+          value="file"
+          checked="checked"
+          onchange="changeInput('file')"
+        />
+        <label for="file">File</label>
+        <input
+          type="radio"
+          id="mic"
+          name="input"
+          value="mic"
+          onchange="changeInput('mic')"
+        />
+        <label for="mic">Microphone</label>
+      </div>
+      <br />
+      <div id="input_file">
+        Audio file:
+        <input type="file" id="file" name="file" onchange="loadAudio(event)" />
+      </div>
+      <div id="input_mic" style="display: none">
+        Microphone:
+        <button id="start" onclick="startRecording()">Start</button>
+        <button id="stop" onclick="stopRecording()" disabled>Stop</button>
+        <!-- progress bar to show recording progress -->
+        <br /><br />
+        <div id="progress" style="display: none">
+          <div
+            id="progress-bar"
+            style="width: 0%; height: 10px; background-color: #4caf50"
+          ></div>
+          <div id="progress-text">0%</div>
         </div>
+      </div>
+      <audio controls="controls" id="audio" loop hidden>
+        Your browser does not support the &lt;audio&gt; tag.
+        <source id="source" src="" type="audio/wav" />
+      </audio>
+      <hr />
+      <br />
+      <table>
+        <tr>
+          <td>
+            Language:
+            <select id="language" name="language">
+              <option value="en">English</option>
+              <option value="ar">Arabic</option>
+              <option value="hy">Armenian</option>
+              <option value="az">Azerbaijani</option>
+              <option value="eu">Basque</option>
+              <option value="be">Belarusian</option>
+              <option value="bn">Bengali</option>
+              <option value="bg">Bulgarian</option>
+              <option value="ca">Catalan</option>
+              <option value="zh">Chinese</option>
+              <option value="hr">Croatian</option>
+              <option value="cs">Czech</option>
+              <option value="da">Danish</option>
+              <option value="nl">Dutch</option>
+              <option value="en">English</option>
+              <option value="et">Estonian</option>
+              <option value="tl">Filipino</option>
+              <option value="fi">Finnish</option>
+              <option value="fr">French</option>
+              <option value="gl">Galician</option>
+              <option value="ka">Georgian</option>
+              <option value="de">German</option>
+              <option value="el">Greek</option>
+              <option value="gu">Gujarati</option>
+              <option value="iw">Hebrew</option>
+              <option value="hi">Hindi</option>
+              <option value="hu">Hungarian</option>
+              <option value="is">Icelandic</option>
+              <option value="id">Indonesian</option>
+              <option value="ga">Irish</option>
+              <option value="it">Italian</option>
+              <option value="ja">Japanese</option>
+              <option value="kn">Kannada</option>
+              <option value="ko">Korean</option>
+              <option value="la">Latin</option>
+              <option value="lv">Latvian</option>
+              <option value="lt">Lithuanian</option>
+              <option value="mk">Macedonian</option>
+              <option value="ms">Malay</option>
+              <option value="mt">Maltese</option>
+              <option value="no">Norwegian</option>
+              <option value="fa">Persian</option>
+              <option value="pl">Polish</option>
+              <option value="pt">Portuguese</option>
+              <option value="ro">Romanian</option>
+              <option value="ru">Russian</option>
+              <option value="sr">Serbian</option>
+              <option value="sk">Slovak</option>
+              <option value="sl">Slovenian</option>
+              <option value="es">Spanish</option>
+              <option value="sw">Swahili</option>
+              <option value="sv">Swedish</option>
+              <option value="ta">Tamil</option>
+              <option value="te">Telugu</option>
+              <option value="th">Thai</option>
+              <option value="tr">Turkish</option>
+              <option value="uk">Ukrainian</option>
+              <option value="ur">Urdu</option>
+              <option value="vi">Vietnamese</option>
+              <option value="cy">Welsh</option>
+              <option value="yi">Yiddish</option>
+            </select>
+          </td>
+          <!-- Slider to select number of threads between 1 and 16 -->
+          <td>
+            Threads:
+            <input
+              type="range"
+              id="threads"
+              name="threads"
+              min="1"
+              max="16"
+              value="8"
+              onchange="changeThreads(this.value)"
+            />
+            <span id="threads-value">8</span>
+          </td>
+          <td>
+            <button onclick="onProcess(false);">Transcribe</button>
+          </td>
+          <td>
+            <button onclick="onProcess(true);">Translate</button>
+          </td>
+        </tr>
+      </table>
+      <br />
+      <!-- textarea with height filling the rest of the page -->
+      <textarea id="output" rows="20"></textarea>
+      <br /><br />
+      <div class="cell-version">
+        <span>
+          | Build time: <span class="nav-link">@GIT_DATE@</span> | Commit hash:
+          <a
+            class="nav-link"
+            href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@"
+            >@GIT_SHA1@</a
+          >
+          | Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
+          <a
+            class="nav-link"
+            href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm"
+            >Source Code</a
+          >
+          |
+        </span>
+      </div>
     </div>
     <script type="text/javascript" src="helpers.js"></script>
+    <script type="text/javascript">
+      document.addEventListener("DOMContentLoaded", () => {
+        //check is shared array buffer is supported
+        if (!window.SharedArrayBuffer) {
+          document.querySelector("#warning").style.display = "block";
         }
+      });
+      // TODO: convert audio buffer to WAV
+      function setAudio(audio) {
+        //if (audio) {
+        //    // convert to 16-bit PCM
+        //    var blob = new Blob([audio], { type: 'audio/wav' });
+        //    var url = URL.createObjectURL(blob);
+        //    document.getElementById('source').src = url;
+        //    document.getElementById('audio').hidden = false;
+        //    document.getElementById('audio').loop = false;
+        //    document.getElementById('audio').load();
+        //} else {
+        //    document.getElementById('audio').hidden = true;
+        //}
+      }
+      function changeInput(input) {
+        if (input == "file") {
+          document.getElementById("input_file").style.display = "block";
+          document.getElementById("input_mic").style.display = "none";
+          document.getElementById("progress").style.display = "none";
+        } else {
+          document.getElementById("input_file").style.display = "none";
+          document.getElementById("input_mic").style.display = "block";
+          document.getElementById("progress").style.display = "block";
+        }
+      }
+      var Module = {
+        print: printTextarea,
+        printErr: printTextarea,
+        setStatus: function (text) {
+          printTextarea("js: " + text);
+        },
+        monitorRunDependencies: function (left) {},
+      };
+      // web audio context
+      var context = null;
+      // audio data
+      var audio = null;
+      // the whisper instance
+      var instance = null;
+      var model_whisper = "";
+      // helper function
+      function convertTypedArray(src, type) {
+        var buffer = new ArrayBuffer(src.byteLength);
+        var baseView = new src.constructor(buffer).set(src);
+        return new type(buffer);
+      }
+      //
+      // load model
+      //
+      let dbVersion = 1;
+      let dbName = "whisper.ggerganov.com";
+      let indexedDB =
+        window.indexedDB ||
+        window.mozIndexedDB ||
+        window.webkitIndexedDB ||
+        window.msIndexedDB;
+      function storeFS(fname, buf) {
+        // write to WASM file using FS_createDataFile
+        // if the file exists, delete it
+        try {
+          Module.FS_unlink(fname);
+        } catch (e) {
+          // ignore
         }
+        Module.FS_createDataFile("/", fname, buf, true, true);
+        //model_whisper = fname;
+        document.getElementById("model-whisper-status").innerHTML =
+          'loaded "' + model_whisper + '"!';
+        printTextarea(
+          "storeFS: stored model: " + fname + " size: " + buf.length
+        );
+        document.getElementById("model").innerHTML =
+          "Model fetched: " + model_whisper;
+      }
+      function loadFile(event, fname) {
+        var file = event.target.files[0] || null;
+        if (file == null) {
+          return;
         }
+        printTextarea(
+          "loadFile: loading model: " +
+            file.name +
+            ", size: " +
+            file.size +
+            " bytes"
+        );
+        printTextarea("loadFile: please wait ...");
+        var reader = new FileReader();
+        reader.onload = function (event) {
+          var buf = new Uint8Array(reader.result);
+          storeFS(fname, buf);
+        };
+        reader.readAsArrayBuffer(file);
+        document.getElementById("fetch-whisper-tiny-en").style.display = "none";
+        document.getElementById("fetch-whisper-base-en").style.display = "none";
+        document.getElementById("fetch-whisper-small-en").style.display =
+          "none";
+        document.getElementById("fetch-whisper-tiny").style.display = "none";
+        document.getElementById("fetch-whisper-base").style.display = "none";
+        document.getElementById("fetch-whisper-small").style.display = "none";
+        document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-tiny-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-base-en-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-base-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-small-en-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-small-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
+          "none";
+        document.getElementById("fetch-whisper-medium-q5_0").style.display =
+          "none";
+        document.getElementById("fetch-whisper-large-q5_0").style.display =
+          "none";
+        document.getElementById("whisper-file").style.display = "none";
+        document.getElementById("model-whisper-status").innerHTML =
+          "loaded model: " + file.name;
+      }
+      function loadWhisper(model) {
+        const baseURL = "https://huggingface.co/ggerganov/whisper.cpp/resolve";
+        let urls = {
+          "tiny.en": `${baseURL}/ggml-model-whisper-tiny.en.bin`,
+          tiny: `${baseURL}/ggml-model-whisper-tiny.bin`,
+          "base.en": `${baseURL}/ggml-model-whisper-base.en.bin`,
+          base: `${baseURL}/ggml-model-whisper-base.bin`,
+          "small.en": `${baseURL}/ggml-model-whisper-small.en.bin`,
+          small: `${baseURL}/ggml-model-whisper-small.bin`,
+          "tiny-en-q5_1": `${baseURL}/ggml-model-whisper-tiny.en-q5_1.bin`,
+          "tiny-q5_1": `${baseURL}/ggml-model-whisper-tiny-q5_1.bin`,
+          "base-en-q5_1": `${baseURL}/ggml-model-whisper-base.en-q5_1.bin`,
+          "base-q5_1": `${baseURL}/ggml-model-whisper-base-q5_1.bin`,
+          "small-en-q5_1": `${baseURL}/ggml-model-whisper-small.en-q5_1.bin`,
+          "small-q5_1": `${baseURL}/ggml-model-whisper-small-q5_1.bin`,
+          "medium-en-q5_0": `${baseURL}/ggml-model-whisper-medium.en-q5_0.bin`,
+          "medium-q5_0": `${baseURL}/ggml-model-whisper-medium-q5_0.bin`,
+          "large-q5_0": `${baseURL}/ggml-model-whisper-large-q5_0.bin`,
+        };
+        let sizes = {
+          "tiny.en": 75,
+          tiny: 75,
+          "base.en": 142,
+          base: 142,
+          "small.en": 466,
+          small: 466,
+          "tiny-en-q5_1": 31,
+          "tiny-q5_1": 31,
+          "base-en-q5_1": 57,
+          "base-q5_1": 57,
+          "small-en-q5_1": 182,
+          "small-q5_1": 182,
+          "medium-en-q5_0": 515,
+          "medium-q5_0": 515,
+          "large-q5_0": 1030,
+        };
+        let url = urls[model];
+        let dst = "whisper.bin";
+        let size_mb = sizes[model];
+        model_whisper = model;
+        document.getElementById("fetch-whisper-tiny-en").style.display = "none";
+        document.getElementById("fetch-whisper-base-en").style.display = "none";
+        document.getElementById("fetch-whisper-small-en").style.display =
+          "none";
+        document.getElementById("fetch-whisper-tiny").style.display = "none";
+        document.getElementById("fetch-whisper-base").style.display = "none";
+        document.getElementById("fetch-whisper-small").style.display = "none";
+        document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-tiny-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-base-en-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-base-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-small-en-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-small-q5_1").style.display =
+          "none";
+        document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
+          "none";
+        document.getElementById("fetch-whisper-medium-q5_0").style.display =
+          "none";
+        document.getElementById("fetch-whisper-large-q5_0").style.display =
+          "none";
+        document.getElementById("whisper-file").style.display = "none";
+        document.getElementById("model-whisper-status").innerHTML =
+          "loading model: " + model;
+        cbProgress = function (p) {
+          let el = document.getElementById("fetch-whisper-progress");
+          el.innerHTML = Math.round(100 * p) + "%";
+        };
+        cbCancel = function () {
+          var el;
+          el = document.getElementById("fetch-whisper-tiny-en");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-base-en");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-small-en");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-tiny");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-base");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-small");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-tiny-en-q5_1");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-tiny-q5_1");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-base-en-q5_1");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-base-q5_1");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-small-en-q5_1");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-small-q5_1");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-medium-en-q5_0");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-medium-q5_0");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("fetch-whisper-large-q5_0");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("whisper-file");
+          if (el) el.style.display = "inline-block";
+          el = document.getElementById("model-whisper-status");
+          if (el) el.innerHTML = "";
+        };
+        loadRemote(
+          url,
+          dst,
+          size_mb,
+          cbProgress,
+          storeFS,
+          cbCancel,
+          printTextarea
+        );
+      }
+      //
+      // audio file
+      //
+      const kMaxAudio_s = 30 * 60;
+      const kMaxRecording_s = 2 * 60;
+      const kSampleRate = 16000;
+      window.AudioContext = window.AudioContext || window.webkitAudioContext;
+      window.OfflineAudioContext =
+        window.OfflineAudioContext || window.webkitOfflineAudioContext;
+      function loadAudio(event) {
+        if (!context) {
+          context = new AudioContext({
+            sampleRate: kSampleRate,
+            channelCount: 1,
+            echoCancellation: false,
+            autoGainControl: true,
+            noiseSuppression: true,
+          });
         }
+        var file = event.target.files[0] || null;
+        if (file == null) {
+          return;
+        }
+        printTextarea(
+          "js: loading audio: " + file.name + ", size: " + file.size + " bytes"
+        );
+        printTextarea("js: please wait ...");
+        var reader = new FileReader();
+        reader.onload = function (event) {
+          var buf = new Uint8Array(reader.result);
+          context.decodeAudioData(
+            buf.buffer,
+            function (audioBuffer) {
+              var offlineContext = new OfflineAudioContext(
+                audioBuffer.numberOfChannels,
+                audioBuffer.length,
+                audioBuffer.sampleRate
+              );
+              var source = offlineContext.createBufferSource();
+              source.buffer = audioBuffer;
+              source.connect(offlineContext.destination);
+              source.start(0);
+              offlineContext.startRendering().then(function (renderedBuffer) {
+                audio = renderedBuffer.getChannelData(0);
+                printTextarea("js: audio loaded, size: " + audio.length);
+                // truncate to first 30 seconds
+                if (audio.length > kMaxAudio_s * kSampleRate) {
+                  audio = audio.slice(0, kMaxAudio_s * kSampleRate);
+                  printTextarea(
+                    "js: truncated audio to first " + kMaxAudio_s + " seconds"
+                  );
+                }
+                setAudio(audio);
+              });
+            },
+            function (e) {
+              printTextarea("js: error decoding audio: " + e);
+              audio = null;
+              setAudio(audio);
             }
+          );
+        };
+        reader.readAsArrayBuffer(file);
+      }
+      //
+      // microphone
+      //
+      var mediaRecorder = null;
+      var doRecording = false;
+      var startTime = 0;
+      function stopRecording() {
+        doRecording = false;
+      }
+      // record up to kMaxRecording_s seconds of audio from the microphone
+      // check if doRecording is false every 1000 ms and stop recording if so
+      // update progress information
+      function startRecording() {
+        if (!context) {
+          context = new AudioContext({
+            sampleRate: kSampleRate,
+            channelCount: 1,
+            echoCancellation: false,
+            autoGainControl: true,
+            noiseSuppression: true,
+          });
         }
+        document.getElementById("start").disabled = true;
+        document.getElementById("stop").disabled = false;
+        document.getElementById("progress-bar").style.width = "0%";
+        document.getElementById("progress-text").innerHTML = "0%";
+        doRecording = true;
+        startTime = Date.now();
+        var chunks = [];
+        var stream = null;
+        navigator.mediaDevices
+          .getUserMedia({ audio: true, video: false })
+          .then(function (s) {
+            stream = s;
+            mediaRecorder = new MediaRecorder(stream);
+            mediaRecorder.ondataavailable = function (e) {
+              chunks.push(e.data);
             };
+            mediaRecorder.onstop = function (e) {
+              var blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
+              chunks = [];
+              document.getElementById("start").disabled = false;
+              document.getElementById("stop").disabled = true;
+              var reader = new FileReader();
+              reader.onload = function (event) {
                 var buf = new Uint8Array(reader.result);
+                context.decodeAudioData(
+                  buf.buffer,
+                  function (audioBuffer) {
+                    var offlineContext = new OfflineAudioContext(
+                      audioBuffer.numberOfChannels,
+                      audioBuffer.length,
+                      audioBuffer.sampleRate
+                    );
                     var source = offlineContext.createBufferSource();
                     source.buffer = audioBuffer;
                     source.connect(offlineContext.destination);
                     source.start(0);
+                    offlineContext
+                      .startRendering()
+                      .then(function (renderedBuffer) {
                         audio = renderedBuffer.getChannelData(0);
+                        printTextarea(
+                          "js: audio recorded, size: " + audio.length
+                        );
                         // truncate to first 30 seconds
+                        if (audio.length > kMaxRecording_s * kSampleRate) {
+                          audio = audio.slice(0, kMaxRecording_s * kSampleRate);
+                          printTextarea(
+                            "js: truncated audio to first " +
+                              kMaxRecording_s +
+                              " seconds"
+                          );
                         }
                         setAudio(audio);
+                      });
+                  },
+                  function (e) {
+                    printTextarea("js: error decoding audio: " + e);
                     audio = null;
                     setAudio(audio);
+                  }
+                );
+              };
+              reader.readAsArrayBuffer(blob);
+            };
+            mediaRecorder.start();
+          })
+          .catch(function (err) {
+            printTextarea("js: error getting audio stream: " + err);
+          });
+        var interval = setInterval(function () {
+          if (!doRecording) {
+            clearInterval(interval);
+            mediaRecorder.stop();
+            stream.getTracks().forEach(function (track) {
+              track.stop();
+            });
+          }
+          document.getElementById("progress-bar").style.width =
+            (100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s + "%";
+          document.getElementById("progress-text").innerHTML =
+            ((100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s).toFixed(
+              0
+            ) + "%";
+        }, 1000);
+        printTextarea("js: recording ...");
+        setTimeout(function () {
+          if (doRecording) {
+            printTextarea(
+              "js: recording stopped after " + kMaxRecording_s + " seconds"
+            );
+            stopRecording();
+          }
+        }, kMaxRecording_s * 1000);
+      }
+      //
+      // transcribe
+      //
+      var nthreads = 8;
+      function changeThreads(value) {
+        nthreads = value;
+        document.getElementById("threads-value").innerHTML = nthreads;
+      }
+      function onProcess(translate) {
+        if (!instance) {
+          instance = Module.init("whisper.bin");
+          if (instance) {
+            printTextarea("js: whisper initialized, instance: " + instance);
+            document.getElementById("model").innerHTML =
+              "Model loaded: " + model_whisper;
+          }
         }
+        if (!instance) {
+          printTextarea("js: failed to initialize whisper");
+          return;
         }
+        if (!audio) {
+          printTextarea("js: no audio data");
+          return;
+        }
+        if (instance) {
+          printTextarea("");
+          printTextarea("js: processing - this might take a while ...");
+          printTextarea("");
+          setTimeout(function () {
+            var ret = Module.full_default(
+              instance,
+              audio,
+              document.getElementById("language").value,
+              nthreads,
+              translate
+            );
+            console.log("js: full_default returned: " + ret);
+            if (ret) {
+              printTextarea("js: whisper returned: " + ret);
             }
+          }, 100);
         }
+      }
     </script>
     <script type="text/javascript" src="main.js"></script>
+  </body>
+</html>