File size: 2,043 Bytes
63559df
dc706d8
cc064b3
63559df
dc706d8
4dbc6e6
 
 
 
81a8df6
2e1a6a0
4dbc6e6
f29169c
f4b9fd8
f29169c
dc706d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d3a5e0
dc706d8
 
 
 
 
 
 
 
 
 
63559df
 
dc706d8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load your model and tokenizer from Hugging Face Hub (forcing CPU usage)

# model_name = "PierreJousselin/lora_model"  # Replace with the name you used on Hugging Face
# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")  # Force model to load on CPU

model_id = "unsloth/Phi-3.5-mini-instruct"
peft_model_id = "eronariodito/Llama_3.2_1B_SFT_The_Tome"

model = AutoModelForCausalLM.from_pretrained(peft_model_id)
# model.load_adapter(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

# Ensure pad_token_id is set to eos_token_id to avoid errors
model.config.pad_token_id = model.config.eos_token_id

# Function for generating responses using the model
def generate_response(prompt):
    # Tokenize input prompt
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
    
    # Ensure the inputs are moved to the CPU
    input_ids = inputs["input_ids"].to("cpu")
    print(input_ids)
    # Generate output (ensure it's on CPU)
    output = model.generate(input_ids, max_length=150, num_return_sequences=1,pad_token_id=tokenizer.eos_token_id)
    
    # Decode and return response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    print(output)
    return response

# Create a Gradio interface with a "Generate" button
iface = gr.Interface(
    fn=generate_response,                     # Function to call for generating response
    inputs=gr.Textbox(label="Input Prompt"),    # Input type (text box for prompt)
    outputs=gr.Textbox(label="Generated Response"),  # Output type (text box for response)
    live=False,                                # Disable live update; only update when button is clicked
    allow_flagging="never"                     # Prevent flagging (optional, if you don't need it)
)

# Launch the interface with a "Generate" button
iface.launch(share=True)  # You can set share=True if you want a public link