from __future__ import annotations import spaces import gradio as gr from threading import Thread from transformers import TextIteratorStreamer import hashlib import os from transformers import AutoModel, AutoProcessor import torch import sys import subprocess from PIL import Image from cobra import load import time subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'mamba-ssm']) subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'causal-conv1d']) vlm = load("cobra+3b") if torch.cuda.is_available(): DEVICE = "cuda" DTYPE = torch.bfloat16 else: DEVICE = "cpu" DTYPE = torch.float32 vlm.to(DEVICE, dtype=DTYPE) prompt_builder = vlm.get_prompt_builder() system_prompt = prompt_builder.system_prompt @spaces.GPU def bot_streaming(message, history): print(message) if message["files"]: image = message["files"][-1]["path"] else: # if there's no image uploaded for this turn, look for images in the past turns # kept inside tuples, take the last one for hist in history: if type(hist[0])==tuple: image = hist[0][0] image = Image.open(image).convert("RGB") prompt_builder.add_turn(role="human", message=message) prompt_text = prompt_builder.get_prompt() # Generate from the VLM generated_text = vlm.generate( image, prompt_text, cg=True, do_sample=False, temperature=1.0, max_new_tokens=2048, # do_sample=cfg.do_sample, # temperature=cfg.temperature, # max_new_tokens=cfg.max_new_tokens, ) prompt_builder.add_turn(role="gpt", message=generated_text) # streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True}) # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=100) # generation_kwargs = dict(image, prompt_text, cg=True, do_sample=cfg.do_sample, temperature=cfg.temperature, max_new_tokens=cfg.max_new_tokens) generation_kwargs = dict(image, prompt_text, cg=True, do_sample=True, temperature=1.0, max_new_tokens=2048) thread = Thread(target=vlm.generate, kwargs=generation_kwargs) thread.start() text_prompt =f"[INST] \n{message['text']} [/INST]" print(generated_text) buffer = "" yield generated_text # for new_text in streamer: # buffer += new_text # generated_text_without_prompt = buffer[len(text_prompt):] # time.sleep(0.04) # yield generated_text_without_prompt demo = gr.ChatInterface(fn=bot_streaming, title="LLaVA Next", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]}, {"text": "How to make this pastry?", "files":["./baklava.png"]}], description="Try [LLaVA Next](https://huggingface.co/papers/2310.03744) in this demo. Upload an image and start chatting about it, or simply try one of the examples below.", stop_btn="Stop Generation", multimodal=True) demo.launch(debug=True)