Spaces:
Runtime error
Runtime error
Luciferalive
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
3 |
|
4 |
# Load the model and tokenizer
|
5 |
model_name = "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0"
|
6 |
-
|
|
|
|
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
|
9 |
def generate_prompt(instruction, user_input):
|
@@ -26,8 +29,9 @@ def get_model_response(user_input, instruction="Identify and summarize the core
|
|
26 |
"""
|
27 |
input_text = generate_prompt(instruction, user_input)
|
28 |
inputs = tokenizer([input_text], return_tensors="pt")
|
29 |
-
|
30 |
-
|
|
|
31 |
return response.split("### Response:")[-1].strip()
|
32 |
|
33 |
# Gradio interface
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
+
import accelerate
|
4 |
|
5 |
# Load the model and tokenizer
|
6 |
model_name = "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0"
|
7 |
+
accelerator = accelerate.Accelerator()
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=False, device_map="auto", offload_folder="/tmp")
|
9 |
+
model = accelerator.prepare(model)
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
11 |
|
12 |
def generate_prompt(instruction, user_input):
|
|
|
29 |
"""
|
30 |
input_text = generate_prompt(instruction, user_input)
|
31 |
inputs = tokenizer([input_text], return_tensors="pt")
|
32 |
+
with accelerator.distribute_inputs_to_prepared(model.device_map, inputs):
|
33 |
+
outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
|
34 |
+
response = tokenizer.batch_decode(accelerator.gather(outputs))[0]
|
35 |
return response.split("### Response:")[-1].strip()
|
36 |
|
37 |
# Gradio interface
|