PierreJousselin commited on
Commit
f16dac9
·
verified ·
1 Parent(s): bbd2c3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -29
app.py CHANGED
@@ -1,39 +1,46 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
- # Load the fine-tuned model and tokenizer
5
- model = AutoModelForCausalLM.from_pretrained("PierreJousselin/lora_model")
6
- tokenizer = AutoTokenizer.from_pretrained("PierreJousselin/lora_model")
7
 
8
- # Define the text generation function
9
- def generate_text(prompt):
10
- # Encode the input prompt
11
- input_ids = tokenizer.encode(prompt, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Generate text using the model
14
- generated_ids = model.generate(
15
- input_ids,
16
- max_length=150, # Maximum length of the generated text
17
- num_return_sequences=1, # Number of sequences to generate
18
- temperature=0.7, # Sampling temperature (controls randomness)
19
- top_p=0.9, # Nucleus sampling (controls diversity)
20
- top_k=50, # Top-k sampling (limits the number of next word candidates)
21
- no_repeat_ngram_size=2, # Avoid repeating n-grams
22
- pad_token_id=tokenizer.eos_token_id
23
- )
24
 
25
- # Decode the generated text
26
- generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
27
- return generated_text
 
28
 
29
- # Create the Gradio interface
30
  iface = gr.Interface(
31
- fn=generate_text, # The function to call when the user provides input
32
- inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), # Input box
33
- outputs=gr.Textbox(), # Output box to display the generated text
34
- title="Lora Fine-Tuned Language Model", # Interface title
35
- description="This is a Gradio interface for the Lora fine-tuned language model. Enter a prompt to generate text.", # Description
36
  )
37
 
38
- # Launch the interface
39
- iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
+ # Load your model and tokenizer from Hugging Face Hub (forcing CPU usage)
 
 
6
 
7
+ # model_name = "PierreJousselin/lora_model" # Replace with the name you used on Hugging Face
8
+ # model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu") # Force model to load on CPU
9
+
10
+ model_id = "unsloth/Phi-3.5-mini-instruct"
11
+ peft_model_id = "PierreJousselin/phi"
12
+
13
+ model = AutoModelForCausalLM.from_pretrained(model_id)
14
+ model.load_adapter(peft_model_id)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
16
+
17
+ # Ensure pad_token_id is set to eos_token_id to avoid errors
18
+ model.config.pad_token_id = model.config.eos_token_id
19
+
20
+ # Function for generating responses using the model
21
+ def generate_response(prompt):
22
+ # Tokenize input prompt
23
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128)
24
 
25
+ # Ensure the inputs are moved to the CPU
26
+ input_ids = inputs["input_ids"].to("cpu")
27
+ print(input_ids)
28
+ # Generate output (ensure it's on CPU)
29
+ output = model.generate(input_ids, max_length=150, num_return_sequences=1,pad_token_id=tokenizer.eos_token_id)
 
 
 
 
 
 
30
 
31
+ # Decode and return response
32
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
33
+ print(output)
34
+ return response
35
 
36
+ # Create a Gradio interface with a "Generate" button
37
  iface = gr.Interface(
38
+ fn=generate_response, # Function to call for generating response
39
+ inputs=gr.Textbox(label="Input Prompt"), # Input type (text box for prompt)
40
+ outputs=gr.Textbox(label="Generated Response"), # Output type (text box for response)
41
+ live=False, # Disable live update; only update when button is clicked
42
+ allow_flagging="never" # Prevent flagging (optional, if you don't need it)
43
  )
44
 
45
+ # Launch the interface with a "Generate" button
46
+ iface.launch(share=True) # You can set share=True if you want a public link