Spaces:

sagar007
/

lama_storm_8b

Runtime error

File size: 1,549 Bytes

985eabb
cc1b568
985eabb
cc1b568
 
 
1f7ba92
 
cc1b568
 
1f7ba92
cc1b568
1f7ba92
02a0e92
cc1b568
1f7ba92
02a0e92
1f7ba92
 
02a0e92
1f7ba92
 
fcba473
cc1b568
 
 
1f7ba92
 
 
 
 
02a0e92
 
cc1b568
1f7ba92

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the model and tokenizer
model_name = "akjindal53244/Llama-3.1-Storm-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

@spaces.GPU(duration=120)
def generate_text(prompt, max_length, temperature):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=temperature,
        top_k=100,
        top_p=0.95,
    )
    
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

iface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=5, label="Prompt"),
        gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
    ],
    outputs=gr.Textbox(lines=10, label="Generated Text"),
    title="Llama-3.1-Storm-8B Text Generation",
    description="Enter a prompt to generate text using the Llama-3.1-Storm-8B model.",
)

iface.launch()