File size: 3,443 Bytes
9bad34e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<script src="https://cdn.tailwindcss.com"></script>
<!-- polyfill for firefox + import maps -->
<script src="https://unpkg.com/[email protected]/dist/es-module-shims.js"></script>
<script type="importmap">
{
"imports": {
"@huggingface/inference": "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm"
}
}
</script>
</head>
<body>
<form class="w-[90%] mx-auto pt-8" onsubmit="launch(); return false;">
<h1 class="text-3xl font-bold">
<span
class="bg-clip-text text-transparent bg-gradient-to-r from-pink-500 to-violet-500"
>
Image to text demo with
<a href="https://github.com/huggingface/huggingface.js">
<kbd>@huggingface/inference</kbd>
</a>
</span>
</h1>
<p class="mt-8">
First, input your token if you have one! Otherwise, you may encounter
rate limiting. You can create a token for free at
<a
target="_blank"
href="https://huggingface.co/settings/tokens"
class="underline text-blue-500"
>hf.co/settings/tokens</a
>
</p>
<input
type="text"
id="token"
class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
placeholder="token (optional)"
/>
<p class="mt-8">
Pick the model you want to run. Check out over 100 models for image to text
<a
href="https://huggingface.co/tasks/image-to-text"
class="underline text-blue-500"
target="_blank"
>
here</a
>. The default model is for image captioning, but you can do text extraction, ...
</p>
<!-- Default model: https://huggingface.co/nlpconnect/vit-gpt2-image-captioning -->
<input
type="text"
id="model"
class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
value="nlpconnect/vit-gpt2-image-captioning"
required
/>
<p class="mt-8">Finally the input image</p>
<input type="file" required accept="image/*"
class="rounded border-blue-500 shadow-md px-3 py-2 w-96 mt-6 block"
rows="5"
id="prompt"
/>
<button
id="submit"
class="my-8 bg-green-500 rounded py-3 px-5 text-white shadow-md disabled:bg-slate-300"
>
Run
</button>
<p class="text-gray-400 text-sm">Output logs</p>
<div id="logs" class="bg-gray-100 rounded p-3 mb-8 text-sm">
Output will be here
</div>
<p>Check out the <a class="underline text-blue-500" href="https://huggingface.co/spaces/huggingfacejs/image-to-text/blob/main/index.html" target="_blank">source code</a></p>
</form>
<script type="module">
import { HfInference } from "@huggingface/inference";
let running = false;
async function launch() {
if (running) {
return;
}
running = true;
try {
const hf = new HfInference(
document.getElementById("token").value.trim() || undefined
);
const model = document.getElementById("model").value.trim();
const prompt = document.getElementById("prompt").files[0];
document.getElementById("logs").textContent = "";
const {generated_text} = await hf.imageToText({model, data: prompt});
document.getElementById("logs").textContent = generated_text;
} catch (err) {
alert("Error: " + err.message);
} finally {
running = false;
}
}
window.launch = launch;
</script>
</body>
</html> |