Spaces:
Sleeping
Sleeping
File size: 2,096 Bytes
fb6252d 7dfe107 f16dac9 7dfe107 f16dac9 7dfe107 f16dac9 af68d90 f16dac9 7dfe107 f16dac9 98744e3 7dfe107 f16dac9 7dfe107 f16dac9 7dfe107 f16dac9 fb6252d f16dac9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load your model and tokenizer from Hugging Face Hub (forcing CPU usage)
# model_name = "PierreJousselin/lora_model" # Replace with the name you used on Hugging Face
# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu") # Force model to load on CPU
#model_id = "unsloth/Phi-3.5-mini-instruct"
#peft_model_id = "PierreJousselin/phi"
model_id = "unsloth/Llama-3.2-1B-Instruct"
peft_model_id = "PierreJousselin/llama"
model = AutoModelForCausalLM.from_pretrained(model_id)
model.load_adapter(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Ensure pad_token_id is set to eos_token_id to avoid errors
model.config.pad_token_id = model.config.eos_token_id
# Function for generating responses using the model
def generate_response(prompt):
# Tokenize input prompt
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
# Ensure the inputs are moved to the CPU
input_ids = inputs["input_ids"].to("cpu")
print(input_ids)
# Generate output (ensure it's on CPU)
output = model.generate(input_ids, max_length=300, num_return_sequences=1,pad_token_id=tokenizer.eos_token_id)
# Decode and return response
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(output)
return response
# Create a Gradio interface with a "Generate" button
iface = gr.Interface(
fn=generate_response, # Function to call for generating response
inputs=gr.Textbox(label="Input Prompt"), # Input type (text box for prompt)
outputs=gr.Textbox(label="Generated Response"), # Output type (text box for response)
live=False, # Disable live update; only update when button is clicked
allow_flagging="never" # Prevent flagging (optional, if you don't need it)
)
# Launch the interface with a "Generate" button
iface.launch(share=True) # You can set share=True if you want a public link |