whisper.cpp-wasm / index.html
radames's picture
Update index.html
d05e7af
<!doctype html>
<html lang="en-us">
<head>
<title>whisper.cpp : WASM example</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
<style>
#output {
width: 100%;
height: 100%;
margin: 0 auto;
margin-top: 10px;
border-left: 0px;
border-right: 0px;
padding-left: 0px;
padding-right: 0px;
display: block;
background-color: black;
color: white;
font-size: 10px;
font-family: "Lucida Console", Monaco, monospace;
outline: none;
white-space: pre;
overflow-wrap: normal;
overflow-x: scroll;
}
</style>
</head>
<body>
<div id="main-container">
<div id="warning" style="display: none; padding: 1rem">
<b style="color: red; font-size: large"
>Warning: your browser does not support SharedArrayBuffer please try
open the page in a new tab.
</b>
<!-- <a target="_blank" href="https://radames-whisper-wasm.hf.space" target="_blank"
>https://radames-whisper-wasm.hf.space</a
> -->
</div>
<b
>Minimal
<a target="_blank" href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a>
example running fully in the browser</b
>
<br /><br />
Usage instructions:<br />
<ul>
<li>
Load a ggml model file (you can obtain one from
<a target="_blank" href="https://ggml.ggerganov.com/">here</a>, recommended:
<b>tiny</b> or <b>base</b>)
</li>
<li>
Select audio file to transcribe or record audio from the microphone
(sample: <a target="_blank" href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)
</li>
<li>Click on the "Transcribe" button to start the transcription</li>
</ul>
Note that the computation is quite heavy and may take a few seconds to
complete.<br />
The transcription results will be displayed in the text area below.<br /><br />
<b>Important:</b>
<ul>
<li>
your browser must support WASM SIMD instructions for this to work
</li>
<li>
Firefox cannot load files larger than 256 MB - use Chrome instead
</li>
</ul>
<b>More examples:</b>
<a target="_blank" href="https://whisper.ggerganov.com/">main</a> |
<a target="_blank" href="https://whisper.ggerganov.com/bench">bench</a> |
<a target="_blank" href="https://whisper.ggerganov.com/stream">stream</a> |
<a target="_blank" href="https://whisper.ggerganov.com/command">command</a> |
<a target="_blank" href="https://whisper.ggerganov.com/talk">talk</a> |
<hr />
<div id="model">
Whisper models: <span id="model-whisper-status"></span><br /><br />
<button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">
tiny.en (75 MB)
</button>
<button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">
tiny (75 MB)
</button>
<button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">
base.en (142 MB)
</button>
<button id="fetch-whisper-base" onclick="loadWhisper('base')">
base (142 MB)
</button>
<button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">
small.en (466 MB)
</button>
<button id="fetch-whisper-small" onclick="loadWhisper('small')">
small (466 MB)
</button>
<input
type="file"
id="whisper-file"
name="file"
onchange="loadFile(event, 'whisper.bin')"
/>
<br /><br />
Quantized models:<br /><br />
<button
id="fetch-whisper-tiny-en-q5_1"
onclick="loadWhisper('tiny-en-q5_1')"
>
tiny.en (Q5_1, 31 MB)
</button>
<button id="fetch-whisper-tiny-q5_1" onclick="loadWhisper('tiny-q5_1')">
tiny (Q5_1, 31 MB)
</button>
<button
id="fetch-whisper-base-en-q5_1"
onclick="loadWhisper('base-en-q5_1')"
>
base.en (Q5_1, 57 MB)
</button>
<button id="fetch-whisper-base-q5_1" onclick="loadWhisper('base-q5_1')">
base (Q5_1, 57 MB)
</button>
<button
id="fetch-whisper-small-en-q5_1"
onclick="loadWhisper('small-en-q5_1')"
>
small.en (Q5_1, 182 MB)
</button>
<button
id="fetch-whisper-small-q5_1"
onclick="loadWhisper('small-q5_1')"
>
small (Q5_1, 182 MB)</button
><br />
<button
id="fetch-whisper-medium-en-q5_0"
onclick="loadWhisper('medium-en-q5_0')"
>
medium.en (Q5_0, 515 MB)
</button>
<button
id="fetch-whisper-medium-q5_0"
onclick="loadWhisper('medium-q5_0')"
>
medium (Q5_0, 515 MB)
</button>
<button
id="fetch-whisper-large-q5_0"
onclick="loadWhisper('large-q5_0')"
>
large (Q5_0, 1030 MB)
</button>
<span id="fetch-whisper-progress"></span>
</div>
<br />
<!-- radio button to select between file upload or microphone -->
<div id="input">
Input:
<input
type="radio"
id="file"
name="input"
value="file"
checked="checked"
onchange="changeInput('file')"
/>
<label for="file">File</label>
<input
type="radio"
id="mic"
name="input"
value="mic"
onchange="changeInput('mic')"
/>
<label for="mic">Microphone</label>
</div>
<br />
<div id="input_file">
Audio file:
<input type="file" id="file" name="file" onchange="loadAudio(event)" />
</div>
<div id="input_mic" style="display: none">
Microphone:
<button id="start" onclick="startRecording()">Start</button>
<button id="stop" onclick="stopRecording()" disabled>Stop</button>
<!-- progress bar to show recording progress -->
<br /><br />
<div id="progress" style="display: none">
<div
id="progress-bar"
style="width: 0%; height: 10px; background-color: #4caf50"
></div>
<div id="progress-text">0%</div>
</div>
</div>
<a target="_blank"udio controls="controls" id="audio" loop hidden>
Your browser does not support the &lt;audio&gt; tag.
<source id="source" src="" type="audio/wav" />
</audio>
<hr />
<br />
<table>
<tr>
<td>
Language:
<select id="language" name="language">
<option value="en">English</option>
<option value="ar">Arabic</option>
<option value="hy">Armenian</option>
<option value="az">Azerbaijani</option>
<option value="eu">Basque</option>
<option value="be">Belarusian</option>
<option value="bn">Bengali</option>
<option value="bg">Bulgarian</option>
<option value="ca">Catalan</option>
<option value="zh">Chinese</option>
<option value="hr">Croatian</option>
<option value="cs">Czech</option>
<option value="da">Danish</option>
<option value="nl">Dutch</option>
<option value="en">English</option>
<option value="et">Estonian</option>
<option value="tl">Filipino</option>
<option value="fi">Finnish</option>
<option value="fr">French</option>
<option value="gl">Galician</option>
<option value="ka">Georgian</option>
<option value="de">German</option>
<option value="el">Greek</option>
<option value="gu">Gujarati</option>
<option value="iw">Hebrew</option>
<option value="hi">Hindi</option>
<option value="hu">Hungarian</option>
<option value="is">Icelandic</option>
<option value="id">Indonesian</option>
<option value="ga">Irish</option>
<option value="it">Italian</option>
<option value="ja">Japanese</option>
<option value="kn">Kannada</option>
<option value="ko">Korean</option>
<option value="la">Latin</option>
<option value="lv">Latvian</option>
<option value="lt">Lithuanian</option>
<option value="mk">Macedonian</option>
<option value="ms">Malay</option>
<option value="mt">Maltese</option>
<option value="no">Norwegian</option>
<option value="fa">Persian</option>
<option value="pl">Polish</option>
<option value="pt">Portuguese</option>
<option value="ro">Romanian</option>
<option value="ru">Russian</option>
<option value="sr">Serbian</option>
<option value="sk">Slovak</option>
<option value="sl">Slovenian</option>
<option value="es">Spanish</option>
<option value="sw">Swahili</option>
<option value="sv">Swedish</option>
<option value="ta">Tamil</option>
<option value="te">Telugu</option>
<option value="th">Thai</option>
<option value="tr">Turkish</option>
<option value="uk">Ukrainian</option>
<option value="ur">Urdu</option>
<option value="vi">Vietnamese</option>
<option value="cy">Welsh</option>
<option value="yi">Yiddish</option>
</select>
</td>
<!-- Slider to select number of threads between 1 and 16 -->
<td>
Threads:
<input
type="range"
id="threads"
name="threads"
min="1"
max="16"
value="8"
onchange="changeThreads(this.value)"
/>
<span id="threads-value">8</span>
</td>
<td>
<button onclick="onProcess(false);">Transcribe</button>
</td>
<td>
<button onclick="onProcess(true);">Translate</button>
</td>
</tr>
</table>
<br />
<!-- textarea with height filling the rest of the page -->
<textarea id="output" rows="20"></textarea>
<br /><br />
<div class="cell-version">
<span>
| Build time: <span class="nav-link">@GIT_DATE@</span> | Commit hash:
<a target="_blank"
class="nav-link"
href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@"
>@GIT_SHA1@</a
>
| Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
<a target="_blank"
class="nav-link"
href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm"
>Source Code</a
>
|
</span>
</div>
</div>
<script type="text/javascript" src="helpers.js"></script>
<script type="text/javascript">
document.addEventListener("DOMContentLoaded", () => {
//check is shared array buffer is supported
if (!window.SharedArrayBuffer) {
document.querySelector("#warning").style.display = "block";
}
});
// TODO: convert audio buffer to WAV
function setAudio(audio) {
//if (audio) {
// // convert to 16-bit PCM
// var blob = new Blob([audio], { type: 'audio/wav' });
// var url = URL.createObjectURL(blob);
// document.getElementById('source').src = url;
// document.getElementById('audio').hidden = false;
// document.getElementById('audio').loop = false;
// document.getElementById('audio').load();
//} else {
// document.getElementById('audio').hidden = true;
//}
}
function changeInput(input) {
if (input == "file") {
document.getElementById("input_file").style.display = "block";
document.getElementById("input_mic").style.display = "none";
document.getElementById("progress").style.display = "none";
} else {
document.getElementById("input_file").style.display = "none";
document.getElementById("input_mic").style.display = "block";
document.getElementById("progress").style.display = "block";
}
}
var Module = {
print: printTextarea,
printErr: printTextarea,
setStatus: function (text) {
printTextarea("js: " + text);
},
monitorRunDependencies: function (left) {},
};
// web audio context
var context = null;
// audio data
var audio = null;
// the whisper instance
var instance = null;
var model_whisper = "";
// helper function
function convertTypedArray(src, type) {
var buffer = new ArrayBuffer(src.byteLength);
var baseView = new src.constructor(buffer).set(src);
return new type(buffer);
}
//
// load model
//
let dbVersion = 1;
let dbName = "whisper.ggerganov.com";
let indexedDB =
window.indexedDB ||
window.mozIndexedDB ||
window.webkitIndexedDB ||
window.msIndexedDB;
function storeFS(fname, buf) {
// write to WASM file using FS_createDataFile
// if the file exists, delete it
try {
Module.FS_unlink(fname);
} catch (e) {
// ignore
}
Module.FS_createDataFile("/", fname, buf, true, true);
//model_whisper = fname;
document.getElementById("model-whisper-status").innerHTML =
'loaded "' + model_whisper + '"!';
printTextarea(
"storeFS: stored model: " + fname + " size: " + buf.length
);
document.getElementById("model").innerHTML =
"Model fetched: " + model_whisper;
}
function loadFile(event, fname) {
var file = event.target.files[0] || null;
if (file == null) {
return;
}
printTextarea(
"loadFile: loading model: " +
file.name +
", size: " +
file.size +
" bytes"
);
printTextarea("loadFile: please wait ...");
var reader = new FileReader();
reader.onload = function (event) {
var buf = new Uint8Array(reader.result);
storeFS(fname, buf);
};
reader.readAsArrayBuffer(file);
document.getElementById("fetch-whisper-tiny-en").style.display = "none";
document.getElementById("fetch-whisper-base-en").style.display = "none";
document.getElementById("fetch-whisper-small-en").style.display =
"none";
document.getElementById("fetch-whisper-tiny").style.display = "none";
document.getElementById("fetch-whisper-base").style.display = "none";
document.getElementById("fetch-whisper-small").style.display = "none";
document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-tiny-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-base-en-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-base-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-small-en-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-small-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
"none";
document.getElementById("fetch-whisper-medium-q5_0").style.display =
"none";
document.getElementById("fetch-whisper-large-q5_0").style.display =
"none";
document.getElementById("whisper-file").style.display = "none";
document.getElementById("model-whisper-status").innerHTML =
"loaded model: " + file.name;
}
function loadWhisper(model) {
const baseURL =
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
let urls = {
"tiny.en": `${baseURL}/ggml-tiny.en.bin`,
tiny: `${baseURL}/ggml-tiny.bin`,
"base.en": `${baseURL}/ggml-base.en.bin`,
base: `${baseURL}/ggml-base.bin`,
"small.en": `${baseURL}/ggml-small.en.bin`,
small: `${baseURL}/ggml-small.bin`,
"tiny-en-q5_1": `${baseURL}/ggml-model-whisper-tiny.en-q5_1.bin`,
"tiny-q5_1": `${baseURL}/ggml-model-whisper-tiny-q5_1.bin`,
"base-en-q5_1": `${baseURL}/ggml-model-whisper-base.en-q5_1.bin`,
"base-q5_1": `${baseURL}/ggml-model-whisper-base-q5_1.bin`,
"small-en-q5_1": `${baseURL}/ggml-model-whisper-small.en-q5_1.bin`,
"small-q5_1": `${baseURL}/ggml-model-whisper-small-q5_1.bin`,
"medium-en-q5_0": `${baseURL}/ggml-model-whisper-medium.en-q5_0.bin`,
"medium-q5_0": `${baseURL}/ggml-model-whisper-medium-q5_0.bin`,
"large-q5_0": `${baseURL}/ggml-model-whisper-large-q5_0.bin`,
};
let sizes = {
"tiny.en": 75,
tiny: 75,
"base.en": 142,
base: 142,
"small.en": 466,
small: 466,
"tiny-en-q5_1": 31,
"tiny-q5_1": 31,
"base-en-q5_1": 57,
"base-q5_1": 57,
"small-en-q5_1": 182,
"small-q5_1": 182,
"medium-en-q5_0": 515,
"medium-q5_0": 515,
"large-q5_0": 1030,
};
let url = urls[model];
let dst = "whisper.bin";
let size_mb = sizes[model];
model_whisper = model;
document.getElementById("fetch-whisper-tiny-en").style.display = "none";
document.getElementById("fetch-whisper-base-en").style.display = "none";
document.getElementById("fetch-whisper-small-en").style.display =
"none";
document.getElementById("fetch-whisper-tiny").style.display = "none";
document.getElementById("fetch-whisper-base").style.display = "none";
document.getElementById("fetch-whisper-small").style.display = "none";
document.getElementById("fetch-whisper-tiny-en-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-tiny-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-base-en-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-base-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-small-en-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-small-q5_1").style.display =
"none";
document.getElementById("fetch-whisper-medium-en-q5_0").style.display =
"none";
document.getElementById("fetch-whisper-medium-q5_0").style.display =
"none";
document.getElementById("fetch-whisper-large-q5_0").style.display =
"none";
document.getElementById("whisper-file").style.display = "none";
document.getElementById("model-whisper-status").innerHTML =
"loading model: " + model;
cbProgress = function (p) {
let el = document.getElementById("fetch-whisper-progress");
el.innerHTML = Math.round(100 * p) + "%";
};
cbCancel = function () {
var el;
el = document.getElementById("fetch-whisper-tiny-en");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-base-en");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-small-en");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-tiny");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-base");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-small");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-tiny-en-q5_1");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-tiny-q5_1");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-base-en-q5_1");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-base-q5_1");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-small-en-q5_1");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-small-q5_1");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-medium-en-q5_0");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-medium-q5_0");
if (el) el.style.display = "inline-block";
el = document.getElementById("fetch-whisper-large-q5_0");
if (el) el.style.display = "inline-block";
el = document.getElementById("whisper-file");
if (el) el.style.display = "inline-block";
el = document.getElementById("model-whisper-status");
if (el) el.innerHTML = "";
};
loadRemote(
url,
dst,
size_mb,
cbProgress,
storeFS,
cbCancel,
printTextarea
);
}
//
// audio file
//
const kMaxAudio_s = 30 * 60;
const kMaxRecording_s = 2 * 60;
const kSampleRate = 16000;
window.AudioContext = window.AudioContext || window.webkitAudioContext;
window.OfflineAudioContext =
window.OfflineAudioContext || window.webkitOfflineAudioContext;
function loadAudio(event) {
if (!context) {
context = new AudioContext({
sampleRate: kSampleRate,
channelCount: 1,
echoCancellation: false,
autoGainControl: true,
noiseSuppression: true,
});
}
var file = event.target.files[0] || null;
if (file == null) {
return;
}
printTextarea(
"js: loading audio: " + file.name + ", size: " + file.size + " bytes"
);
printTextarea("js: please wait ...");
var reader = new FileReader();
reader.onload = function (event) {
var buf = new Uint8Array(reader.result);
context.decodeAudioData(
buf.buffer,
function (audioBuffer) {
var offlineContext = new OfflineAudioContext(
audioBuffer.numberOfChannels,
audioBuffer.length,
audioBuffer.sampleRate
);
var source = offlineContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(offlineContext.destination);
source.start(0);
offlineContext.startRendering().then(function (renderedBuffer) {
audio = renderedBuffer.getChannelData(0);
printTextarea("js: audio loaded, size: " + audio.length);
// truncate to first 30 seconds
if (audio.length > kMaxAudio_s * kSampleRate) {
audio = audio.slice(0, kMaxAudio_s * kSampleRate);
printTextarea(
"js: truncated audio to first " + kMaxAudio_s + " seconds"
);
}
setAudio(audio);
});
},
function (e) {
printTextarea("js: error decoding audio: " + e);
audio = null;
setAudio(audio);
}
);
};
reader.readAsArrayBuffer(file);
}
//
// microphone
//
var mediaRecorder = null;
var doRecording = false;
var startTime = 0;
function stopRecording() {
doRecording = false;
}
// record up to kMaxRecording_s seconds of audio from the microphone
// check if doRecording is false every 1000 ms and stop recording if so
// update progress information
function startRecording() {
if (!context) {
context = new AudioContext({
sampleRate: kSampleRate,
channelCount: 1,
echoCancellation: false,
autoGainControl: true,
noiseSuppression: true,
});
}
document.getElementById("start").disabled = true;
document.getElementById("stop").disabled = false;
document.getElementById("progress-bar").style.width = "0%";
document.getElementById("progress-text").innerHTML = "0%";
doRecording = true;
startTime = Date.now();
var chunks = [];
var stream = null;
navigator.mediaDevices
.getUserMedia({ audio: true, video: false })
.then(function (s) {
stream = s;
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = function (e) {
chunks.push(e.data);
};
mediaRecorder.onstop = function (e) {
var blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
chunks = [];
document.getElementById("start").disabled = false;
document.getElementById("stop").disabled = true;
var reader = new FileReader();
reader.onload = function (event) {
var buf = new Uint8Array(reader.result);
context.decodeAudioData(
buf.buffer,
function (audioBuffer) {
var offlineContext = new OfflineAudioContext(
audioBuffer.numberOfChannels,
audioBuffer.length,
audioBuffer.sampleRate
);
var source = offlineContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(offlineContext.destination);
source.start(0);
offlineContext
.startRendering()
.then(function (renderedBuffer) {
audio = renderedBuffer.getChannelData(0);
printTextarea(
"js: audio recorded, size: " + audio.length
);
// truncate to first 30 seconds
if (audio.length > kMaxRecording_s * kSampleRate) {
audio = audio.slice(0, kMaxRecording_s * kSampleRate);
printTextarea(
"js: truncated audio to first " +
kMaxRecording_s +
" seconds"
);
}
setAudio(audio);
});
},
function (e) {
printTextarea("js: error decoding audio: " + e);
audio = null;
setAudio(audio);
}
);
};
reader.readAsArrayBuffer(blob);
};
mediaRecorder.start();
})
.catch(function (err) {
printTextarea("js: error getting audio stream: " + err);
});
var interval = setInterval(function () {
if (!doRecording) {
clearInterval(interval);
mediaRecorder.stop();
stream.getTracks().forEach(function (track) {
track.stop();
});
}
document.getElementById("progress-bar").style.width =
(100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s + "%";
document.getElementById("progress-text").innerHTML =
((100 * (Date.now() - startTime)) / 1000 / kMaxRecording_s).toFixed(
0
) + "%";
}, 1000);
printTextarea("js: recording ...");
setTimeout(function () {
if (doRecording) {
printTextarea(
"js: recording stopped after " + kMaxRecording_s + " seconds"
);
stopRecording();
}
}, kMaxRecording_s * 1000);
}
//
// transcribe
//
var nthreads = 8;
function changeThreads(value) {
nthreads = value;
document.getElementById("threads-value").innerHTML = nthreads;
}
function onProcess(translate) {
if (!instance) {
instance = Module.init("whisper.bin");
if (instance) {
printTextarea("js: whisper initialized, instance: " + instance);
document.getElementById("model").innerHTML =
"Model loaded: " + model_whisper;
}
}
if (!instance) {
printTextarea("js: failed to initialize whisper");
return;
}
if (!audio) {
printTextarea("js: no audio data");
return;
}
if (instance) {
printTextarea("");
printTextarea("js: processing - this might take a while ...");
printTextarea("");
setTimeout(function () {
var ret = Module.full_default(
instance,
audio,
document.getElementById("language").value,
nthreads,
translate
);
console.log("js: full_default returned: " + ret);
if (ret) {
printTextarea("js: whisper returned: " + ret);
}
}, 100);
}
}
</script>
<script type="text/javascript" src="main.js"></script>
</body>
</html>