import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer model_name = "Spestly/AwA-0.5B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, low_cpu_mem_usage=True) # Set to evaluation mode model.eval() def generate_response(message, history): instruction = ( "You are an LLM called AwA. Anthropic does NOT train you. " "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. " "Below is an instruction that describes a task. Answer it clearly and concisely. Don't overthink answers, but don't underthink them aswell.\n\n" f"### Instruction:\n{message}\n\n### Response:" ) inputs = tokenizer(instruction, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1300, num_return_sequences=1, temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("### Response:")[-1].strip() return response iface = gr.ChatInterface( generate_response, chatbot=gr.Chatbot(height=600, type="messages"), textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7), title="AwA-0.5B 🔎 - Experimental", description="Chat with AwA (Answers with Athena). Please note that since AwA is an experimental model, some outputs may not be accurate/expected!", theme="ocean", examples=[ "How can CRISPR help us Humans?", "What are some important ethics in AI", "How does Quantum Physics work?", ], type="messages" ) iface.launch()