Spaces:

KasKniesmeijer
/

FAAM-demo

Sleeping

KasKniesmeijer commited on 26 days ago

Commit

cab1df1

•

1 Parent(s): 7c0f537

Add SmolVLM with WebGPU frontend

Files changed (5) hide show

app.py ADDED Viewed

+import gradio as gr
+import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq
+# Set the device (CPU or CUDA)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Initialize processor and model
+processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
+model = AutoModelForVision2Seq.from_pretrained(
+    "HuggingFaceTB/SmolVLM-Instruct",
+    torch_dtype=torch.bfloat16,
+    _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
+).to(DEVICE)
+# Define the function to answer questions
+def answer_question(image, question):
+    inputs = processor(images=image, text=question, return_tensors="pt").to(DEVICE)
+    outputs = model.generate(**inputs)
+    answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+    return answer
+# Gradio interface
+interface = gr.Interface(
+    fn=answer_question,
+    inputs=["image", "text"],
+    outputs="text",
+    title="SmolVLM - Vision-Language Question Answering",
+    description="Upload an image and ask a question to get an answer powered by SmolVLM.",
+)
+if __name__ == "__main__":
+    interface.launch()

index.html CHANGED Viewed

@@ -4,12 +4,21 @@
 <head>
 	<meta charset="UTF-8">
 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
-	<title>WebGPU Hugging Face Space</title>
 	<link rel="stylesheet" href="styles.css">
 </head>
 <body>
-	<canvas id="webgpu-canvas"></canvas>
 	<script type="module" src="./src/main.js"></script>
 </body>

 <head>
 	<meta charset="UTF-8">
 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
+	<title>SmolVLM WebGPU</title>
 	<link rel="stylesheet" href="styles.css">
 </head>
 <body>
+	<h1>SmolVLM - Vision-Language Question Answering</h1>
+	<div id="app">
+		<canvas id="webgpu-canvas"></canvas>
+		<div id="controls">
+			<input type="file" id="image-upload" accept="image/*">
+			<input type="text" id="question" placeholder="Ask a question about the image">
+			<button id="submit-btn">Submit</button>
+		</div>
+		<div id="answer">Answer will appear here</div>
+	</div>
 	<script type="module" src="./src/main.js"></script>
 </body>

requirements.txt ADDED Viewed

+torch
+transformers
+gradio

src/main.js CHANGED Viewed

@@ -1,10 +1,11 @@
-async function initWebGPU() {
     if (!navigator.gpu) {
         document.body.innerHTML = "<p>Your browser does not support WebGPU.</p>";
         return;
     }
-    const canvas = document.getElementById("webgpu-canvas");
     const adapter = await navigator.gpu.requestAdapter();
     const device = await adapter.requestDevice();
     const context = canvas.getContext("webgpu");
@@ -15,7 +16,37 @@ async function initWebGPU() {
         alphaMode: "opaque",
     });
-    console.log("WebGPU initialized successfully!");
 }
-initWebGPU();

+async function initializeWebGPU() {
+    const canvas = document.getElementById("webgpu-canvas");
     if (!navigator.gpu) {
         document.body.innerHTML = "<p>Your browser does not support WebGPU.</p>";
         return;
     }
     const adapter = await navigator.gpu.requestAdapter();
     const device = await adapter.requestDevice();
     const context = canvas.getContext("webgpu");
         alphaMode: "opaque",
     });
+    console.log("WebGPU initialized.");
+}
+// Submit the image and question to the backend
+async function submitQuestion(imageFile, question) {
+    const formData = new FormData();
+    formData.append("image", imageFile);
+    formData.append("text", question);
+    const response = await fetch("/predict", {
+        method: "POST",
+        body: formData,
+    });
+    if (!response.ok) {
+        console.error("Failed to get a response:", response.statusText);
+        return "Error: Unable to fetch the answer.";
+    }
+    const result = await response.json();
+    return result.data[0];
 }
+// Handle user interactions
+document.getElementById("submit-btn").addEventListener("click", async () => {
+    const imageFile = document.getElementById("image-upload").files[0];
+    const question = document.getElementById("question").value;
+    const answer = await submitQuestion(imageFile, question);
+    document.getElementById("answer").innerText = `Answer: ${answer}`;
+});
+// Initialize WebGPU when the page loads
+initializeWebGPU();

style.css CHANGED Viewed

@@ -1,16 +1,39 @@
 body {
 	margin: 0;
 	display: flex;
-	justify-content: center;
 	align-items: center;
-	height: 100vh;
-	background: #222;
-	color: white;
-	font-family: Arial, sans-serif;
 }
 canvas {
 	width: 800px;
 	height: 600px;
-	border: 1px solid #fff;
 }

 body {
+	font-family: Arial, sans-serif;
+	background: #222;
+	color: white;
+	text-align: center;
 	margin: 0;
+	padding: 0;
+}
+h1 {
+	margin: 20px;
+}
+#app {
 	display: flex;
+	flex-direction: column;
 	align-items: center;
+	margin: 20px;
 }
 canvas {
 	width: 800px;
 	height: 600px;
+	margin: 20px 0;
+	border: 2px solid white;
+}
+#controls {
+	display: flex;
+	flex-direction: column;
+	align-items: center;
+	gap: 10px;
+}
+#answer {
+	margin-top: 20px;
+	font-size: 1.2em;
+	color: #0f0;
 }