Llama-3.2-Vision-Free

Runtime error

App Files Files Community

hassanelmghari commited on Sep 26, 2024

Commit

efae69e

verified ·

1 Parent(s): 04fcd0b

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -47

app.py CHANGED Viewed

@@ -31,83 +31,112 @@ def encode_image(image_path, max_size=(800, 800), quality=85):
         img.save(buffered, format="JPEG", quality=quality)
         return base64.b64encode(buffered.getvalue()).decode('utf-8')
-def bot_streaming(message, history, together_api_key, max_new_tokens=250, max_history=5):
     if client is None:
-        initialize_client(together_api_key)
-    txt = message["text"]
     messages = []
     images = []
-    for i, msg in enumerate(history[-max_history:]):
-        if isinstance(msg[0], tuple):
-            messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
-            messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
-        elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
-            pass
-        elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
-            messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
-            messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
-    if len(message["files"]) == 1:
-        if isinstance(message["files"][0], str):  # examples
-            image_path = message["files"][0]
-        else:  # regular input
-            image_path = message["files"][0]["path"]
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
-    else:
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
-    try:
         stream = client.chat.completions.create(
             model="meta-llama/Llama-Vision-Free",
             messages=messages,
             max_tokens=max_new_tokens,
             stream=True,
         )
         buffer = ""
         for chunk in stream:
-            if chunk.choices[0].delta.content is not None:
                 buffer += chunk.choices[0].delta.content
                 time.sleep(0.01)
                 yield buffer
     except Exception as e:
         if "Request Entity Too Large" in str(e):
             yield "The image is too large. Please try with a smaller image or compress the existing one."
         else:
             yield f"An error occurred: {str(e)}"
-with gr.Blocks() as demo:
     gr.Markdown("# Meta Llama-3.2-11B-Vision-Instruct (FREE)")
-    gr.Markdown("Try the new Llama 3.2 11B Vision API by Meta for free through Together AI. Upload an image, and start chatting about it. Just paste in your [Together AI API key](https://api.together.xyz/settings/api-keys) and get started!")
-    together_api_key = gr.Textbox(
-        label="Together API Key",
-        placeholder="Enter your TOGETHER_API_KEY here",
-        type="password"
-    )
-    chatbot = gr.ChatInterface(
-        fn=bot_streaming,
-        textbox=gr.MultimodalTextbox(),
-        additional_inputs=[
-            gr.Slider(
-                minimum=10,
-                maximum=500,
-                value=250,
-                step=10,
-                label="Maximum number of new tokens to generate",
-            )
-        ],
-        cache_examples=False,
-        stop_btn="Stop Generation",
-        fill_height=True,
-        multimodal=True
-    )
-    together_api_key.change(lambda x: x, inputs=[together_api_key], outputs=[chatbot.additional_inputs[0]])
 if __name__ == "__main__":
     demo.launch(debug=True)

         img.save(buffered, format="JPEG", quality=quality)
         return base64.b64encode(buffered.getvalue()).decode('utf-8')
+def bot_streaming(message, history, together_api_key, max_new_tokens=250, temperature=0.7, max_history=5):
     if client is None:
+        try:
+            initialize_client(together_api_key)
+        except Exception as e:
+            yield f"Error initializing client: {str(e)}"
+            return
+    txt = message.get("text", "")
     messages = []
     images = []
+    try:
+        for i, msg in enumerate(history[-max_history:]):
+            if isinstance(msg[0], tuple):
+                messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
+                messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
+            elif isinstance(history[i-1][0], tuple) and isinstance(msg[0], str):
+                pass
+            elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
+                messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
+                messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
+        if "files" in message and len(message["files"]) == 1:
+            if isinstance(message["files"][0], str):  # examples
+                image_path = message["files"][0]
+            else:  # regular input
+                image_path = message["files"][0]["path"]
+            messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
+        else:
+            messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
         stream = client.chat.completions.create(
             model="meta-llama/Llama-Vision-Free",
             messages=messages,
             max_tokens=max_new_tokens,
+            temperature=temperature,
             stream=True,
         )
         buffer = ""
         for chunk in stream:
+            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content is not None:
                 buffer += chunk.choices[0].delta.content
                 time.sleep(0.01)
                 yield buffer
+        if not buffer:
+            yield "No response generated. Please try again."
     except Exception as e:
         if "Request Entity Too Large" in str(e):
             yield "The image is too large. Please try with a smaller image or compress the existing one."
         else:
             yield f"An error occurred: {str(e)}"
+css = """
+#chatbot-container {
+    height: calc(100vh - 230px);
+    overflow-y: auto;
+}
+#chatbot-textbox {
+    position: fixed;
+    bottom: 20px;
+    left: 20px;
+    right: 20px;
+}
+"""
+with gr.Blocks(css=css) as demo:
     gr.Markdown("# Meta Llama-3.2-11B-Vision-Instruct (FREE)")
+    gr.Markdown("Try the new Llama 3.2 11B Vision API by Meta for free through Together AI. Upload an image, and start chatting about it. Just paste in your Together AI API key and get started!")
+    with gr.Row():
+        together_api_key = gr.Textbox(
+            label="Together API Key",
+            placeholder="Enter your TOGETHER_API_KEY here",
+            type="password"
+        )
+    with gr.Row():
+        max_new_tokens = gr.Slider(
+            minimum=10,
+            maximum=500,
+            value=250,
+            step=10,
+            label="Maximum number of new tokens",
+        )
+        temperature = gr.Number(
+            value=0.7,
+            minimum=0,
+            maximum=1,
+            step=0.1,
+            label="Temperature"
+        )
+    with gr.Column(elem_id="chatbot-container"):
+        chatbot = gr.ChatInterface(
+            fn=bot_streaming,
+            textbox=gr.MultimodalTextbox(elem_id="chatbot-textbox"),
+            additional_inputs=[together_api_key, max_new_tokens, temperature],
+            cache_examples=False,
+            stop_btn="Stop Generation",
+            fill_height=True,
+            multimodal=True
+        )
 if __name__ == "__main__":
     demo.launch(debug=True)