from transformers import BloomForCausalLM, BloomTokenizerFast import gradio as gr tokenizer = BloomTokenizerFast.from_pretrained('nicholasKluge/Aira-Instruct-PT-560M', use_auth_token="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") model = BloomForCausalLM.from_pretrained('nicholasKluge/Aira-Instruct-PT-560M', use_auth_token="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") import gradio as gr title = "AIRA Demo 🤓" with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear Conversation") def respond(message, chat_history): inputs = tokenizer(tokenizer.bos_token + message + tokenizer.eos_token, return_tensors="pt") response = model.generate(**inputs, bos_token_id=tokenizer.bos_token_id, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, do_sample=True, early_stopping=True, top_k=50, max_length=200, top_p=0.95, temperature=0.7, num_return_sequences=1) chat_history.append((f"👤 {message}", f"""🤖 {tokenizer.decode(response[0], skip_special_tokens=True).replace(message, "")}""")) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()