from io import BytesIO import string import gradio as gr import requests from utils import Endpoint, get_token def encode_image(image): buffered = BytesIO() image.save(buffered, format="JPEG") buffered.seek(0) return buffered def query_chat_api( image, prompt, decoding_method, temperature, len_penalty, repetition_penalty ): url = endpoint.url headers = { "User-Agent": "BLIP-2 HuggingFace Space", "Auth-Token": get_token(), } data = { "prompt": prompt, "use_nucleus_sampling": decoding_method == "Nucleus sampling", "temperature": temperature, "length_penalty": len_penalty, "repetition_penalty": repetition_penalty, } image = encode_image(image) files = {"image": image} response = requests.post(url, data=data, files=files, headers=headers) if response.status_code == 200: return response.json() else: return "Error: " + response.text def query_caption_api( image, decoding_method, temperature, len_penalty, repetition_penalty ): url = endpoint.url # replace /generate with /caption url = url.replace("/generate", "/caption") headers = { "User-Agent": "BLIP-2 HuggingFace Space", "Auth-Token": get_token(), } data = { "use_nucleus_sampling": decoding_method == "Nucleus sampling", "temperature": temperature, "length_penalty": len_penalty, "repetition_penalty": repetition_penalty, } image = encode_image(image) files = {"image": image} response = requests.post(url, data=data, files=files, headers=headers) if response.status_code == 200: return response.json() else: return "Error: " + response.text def postprocess_output(output): # if last character is not a punctuation, add a full stop if not output[0][-1] in string.punctuation: output[0] += "." return output def inference_chat( image, text_input, decoding_method, temperature, length_penalty, repetition_penalty, history=[], ): text_input = text_input history.append(text_input) prompt = " ".join(history) print(prompt) output = query_chat_api( image, prompt, decoding_method, temperature, length_penalty, repetition_penalty ) output = postprocess_output(output) history += output chat = [ (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list return {chatbot: chat, state: history} def inference_caption( image, decoding_method, temperature, length_penalty, repetition_penalty, ): output = query_caption_api( image, decoding_method, temperature, length_penalty, repetition_penalty ) return output[0] title = """
Disclaimer: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected.
""" article = """Paper: BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models