Spaces:

PierreJousselin
/

LAB_LLM

Sleeping

App Files Files Community

PierreJousselin commited on Dec 6, 2024

Commit

f16dac9

verified ·

1 Parent(s): bbd2c3b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -29

app.py CHANGED Viewed

@@ -1,39 +1,46 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Load the fine-tuned model and tokenizer
-model = AutoModelForCausalLM.from_pretrained("PierreJousselin/lora_model")
-tokenizer = AutoTokenizer.from_pretrained("PierreJousselin/lora_model")
-# Define the text generation function
-def generate_text(prompt):
-    # Encode the input prompt
-    input_ids = tokenizer.encode(prompt, return_tensors="pt")
-    # Generate text using the model
-    generated_ids = model.generate(
-        input_ids,
-        max_length=150,  # Maximum length of the generated text
-        num_return_sequences=1,  # Number of sequences to generate
-        temperature=0.7,  # Sampling temperature (controls randomness)
-        top_p=0.9,  # Nucleus sampling (controls diversity)
-        top_k=50,  # Top-k sampling (limits the number of next word candidates)
-        no_repeat_ngram_size=2,  # Avoid repeating n-grams
-        pad_token_id=tokenizer.eos_token_id
-    )
-    # Decode the generated text
-    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-    return generated_text
-# Create the Gradio interface
 iface = gr.Interface(
-    fn=generate_text,  # The function to call when the user provides input
-    inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),  # Input box
-    outputs=gr.Textbox(),  # Output box to display the generated text
-    title="Lora Fine-Tuned Language Model",  # Interface title
-    description="This is a Gradio interface for the Lora fine-tuned language model. Enter a prompt to generate text.",  # Description
 )
-# Launch the interface
-iface.launch()

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Load your model and tokenizer from Hugging Face Hub (forcing CPU usage)
+# model_name = "PierreJousselin/lora_model"  # Replace with the name you used on Hugging Face
+# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")  # Force model to load on CPU
+model_id = "unsloth/Phi-3.5-mini-instruct"
+peft_model_id = "PierreJousselin/phi"
+model = AutoModelForCausalLM.from_pretrained(model_id)
+model.load_adapter(peft_model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# Ensure pad_token_id is set to eos_token_id to avoid errors
+model.config.pad_token_id = model.config.eos_token_id
+# Function for generating responses using the model
+def generate_response(prompt):
+    # Tokenize input prompt
+    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
+    # Ensure the inputs are moved to the CPU
+    input_ids = inputs["input_ids"].to("cpu")
+    print(input_ids)
+    # Generate output (ensure it's on CPU)
+    output = model.generate(input_ids, max_length=150, num_return_sequences=1,pad_token_id=tokenizer.eos_token_id)
+    # Decode and return response
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    print(output)
+    return response
+# Create a Gradio interface with a "Generate" button
 iface = gr.Interface(
+    fn=generate_response,                     # Function to call for generating response
+    inputs=gr.Textbox(label="Input Prompt"),    # Input type (text box for prompt)
+    outputs=gr.Textbox(label="Generated Response"),  # Output type (text box for response)
+    live=False,                                # Disable live update; only update when button is clicked
+    allow_flagging="never"                     # Prevent flagging (optional, if you don't need it)
 )
+# Launch the interface with a "Generate" button
+iface.launch(share=True)  # You can set share=True if you want a public link