Spaces:

PierreJousselin
/

LAB_LLM

Sleeping

File size: 2,096 Bytes

fb6252d
7dfe107
f16dac9
7dfe107
f16dac9
7dfe107
f16dac9
 
 
af68d90
 
 
 
f16dac9
 
 
 
 
 
 
 
 
 
 
7dfe107
f16dac9
 
 
 
98744e3
7dfe107
f16dac9
 
 
 
7dfe107
f16dac9
7dfe107
f16dac9
 
 
 
 
fb6252d
 
f16dac9

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load your model and tokenizer from Hugging Face Hub (forcing CPU usage)

# model_name = "PierreJousselin/lora_model"  # Replace with the name you used on Hugging Face
# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")  # Force model to load on CPU

#model_id = "unsloth/Phi-3.5-mini-instruct"
#peft_model_id = "PierreJousselin/phi"
model_id = "unsloth/Llama-3.2-1B-Instruct"
peft_model_id = "PierreJousselin/llama"
model = AutoModelForCausalLM.from_pretrained(model_id)
model.load_adapter(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Ensure pad_token_id is set to eos_token_id to avoid errors
model.config.pad_token_id = model.config.eos_token_id

# Function for generating responses using the model
def generate_response(prompt):
    # Tokenize input prompt
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
    
    # Ensure the inputs are moved to the CPU
    input_ids = inputs["input_ids"].to("cpu")
    print(input_ids)
    # Generate output (ensure it's on CPU)
    output = model.generate(input_ids, max_length=300, num_return_sequences=1,pad_token_id=tokenizer.eos_token_id)
    
    # Decode and return response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    print(output)
    return response

# Create a Gradio interface with a "Generate" button
iface = gr.Interface(
    fn=generate_response,                     # Function to call for generating response
    inputs=gr.Textbox(label="Input Prompt"),    # Input type (text box for prompt)
    outputs=gr.Textbox(label="Generated Response"),  # Output type (text box for response)
    live=False,                                # Disable live update; only update when button is clicked
    allow_flagging="never"                     # Prevent flagging (optional, if you don't need it)
)

# Launch the interface with a "Generate" button
iface.launch(share=True)  # You can set share=True if you want a public link