Spestly commited on
Commit
7b260fb
·
verified ·
1 Parent(s): f941c00

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # Load model and tokenizer
6
+ model_name = "Spestly/Athena-2-1.5B"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, low_cpu_mem_usage=True)
9
+
10
+ # Set to evaluation mode
11
+ model.eval()
12
+
13
+ def generate_response(message, history):
14
+ instruction = (
15
+ "You are an LLM called Athena. Aayan Mishra finetunes you. Anthropic does NOT train you. "
16
+ "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. "
17
+ "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
18
+ f"### Instruction:\n{message}\n\n### Response:"
19
+ )
20
+
21
+ inputs = tokenizer(instruction, return_tensors="pt")
22
+
23
+ with torch.no_grad():
24
+ outputs = model.generate(
25
+ **inputs,
26
+ max_new_tokens=1000,
27
+ num_return_sequences=1,
28
+ temperature=0.7,
29
+ top_p=0.9,
30
+ do_sample=True
31
+ )
32
+
33
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
+ response = response.split("### Response:")[-1].strip()
35
+
36
+ return response
37
+
38
+ iface = gr.ChatInterface(
39
+ generate_response,
40
+ chatbot=gr.Chatbot(height=600, type="messages"),
41
+ textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
42
+ title="Athena-1 - Beta",
43
+ description="Chat with Athena-2 (Beta)",
44
+ theme="monochrome",
45
+ examples=[
46
+ "What is Pagani and what are they known for?",
47
+ "Make a small Python Neural Network.",
48
+ "What is the capital of Canada?",
49
+ ],
50
+ type="messages"
51
+ )
52
+
53
+ iface.launch()