missbaj commited on
Commit
a939c7f
·
verified ·
1 Parent(s): eb889cd
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -1,15 +1,26 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
  # Load the smaller model and tokenizer
5
- model_name = "distilgpt2"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
 
 
 
 
9
  def generate_response(prompt):
10
- inputs = tokenizer(prompt, return_tensors="pt")
11
- outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)
 
 
 
 
 
12
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
13
  return response
14
 
15
  # Set up Gradio interface
@@ -21,4 +32,5 @@ iface = gr.Interface(
21
  description="Enter your prompt related to Bitcoin or cryptocurrency."
22
  )
23
 
 
24
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
  # Load the smaller model and tokenizer
6
+ model_name = "distilgpt2" # A smaller model that should work with 16GB of RAM
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
+ # Set the device to GPU if available, else use CPU
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ model.to(device)
13
+
14
  def generate_response(prompt):
15
+ # Encode the input prompt
16
+ inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
17
+
18
+ # Generate the output sequence
19
+ outputs = model.generate(inputs, max_length=150, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
20
+
21
+ # Decode the generated sequence
22
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+
24
  return response
25
 
26
  # Set up Gradio interface
 
32
  description="Enter your prompt related to Bitcoin or cryptocurrency."
33
  )
34
 
35
+ # Launch the interface
36
  iface.launch()