Spaces:
Runtime error
Runtime error
import spaces | |
import gradio as gr | |
from cartesia_pytorch import ReneLMHeadModel | |
from transformers import AutoTokenizer | |
#import subprocess | |
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) | |
# Load model and tokenizer | |
model = ReneLMHeadModel.from_pretrained("cartesia-ai/Rene-v0.1-1.3b-pytorch").half().cuda() | |
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf") | |
# Define the function to generate text | |
def generate_text(input_text): | |
inputs = tokenizer([input_text], return_tensors="pt") | |
outputs = model.generate(inputs.input_ids.cuda(), max_length=50, top_k=100, top_p=0.99) | |
out_message = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
return out_message | |
# Create Gradio interface | |
interface = gr.Interface( | |
fn=generate_text, | |
inputs="text", | |
outputs="text", | |
title="ReneLM Text Generator", | |
description="Generate text using ReneLMHeadModel from a prompt." | |
) | |
# Launch the Gradio app | |
interface.launch() | |