charanhu commited on
Commit
f55178e
·
1 Parent(s): 5f5a729

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -36
app.py CHANGED
@@ -1,47 +1,33 @@
1
  import gradio as gr
 
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
4
- from threading import Thread
5
 
6
- # Load model and tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")
8
  model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")
9
 
10
- class StopOnTokens(StoppingCriteria):
11
- def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
12
- stop_ids = [29, 0]
13
- for stop_id in stop_ids:
14
- if input_ids[0][-1] == stop_id:
15
- return True
16
- return False
17
 
18
- def predict(message, history):
 
19
 
20
- history_transformer_format = history + [[message, ""]]
21
- stop = StopOnTokens()
 
 
22
 
23
- messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]]) #curr_system_message +
24
- for item in history_transformer_format])
 
25
 
26
- model_inputs = tokenizer([messages], return_tensors="pt")
27
- temperature = gr.Slider(minimum=0.1, maximum=2.0, value=1.0, label="Temperature"),
28
- max_new_tokens = gr.Slider(minimum=0, maximum=2048, value=10, label="Temperature"),
29
- min_new_tokens = gr.Slider(minimum=0, maximum=2048, value=1, label="Temperature"),
30
- generate_kwargs = dict(
31
- model_inputs,
32
- max_new_tokens=int(max_new_tokens.value),
33
- min_new_tokens=int(min_new_tokens.value),
34
- do_sample=True,
35
- top_p=1,
36
- top_k=50,
37
- temperature=float(temperature.value),
38
- num_beams=1,
39
- stopping_criteria=StoppingCriteriaList([stop])
40
- )
41
- generated_sequence = model.generate(**generate_kwargs)[0]
42
- generated_text = tokenizer.decode(generated_sequence, skip_special_tokens=True)
43
 
44
- yield generated_text
45
-
46
-
47
- gr.ChatInterface(predict).queue().launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
4
 
 
5
  tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")
6
  model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")
7
 
8
+ def generate_text(prompt, temperature, max_length, min_length):
9
+ # Tokenize the prompt
10
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
 
 
 
 
11
 
12
+ # Generate text using the model
13
+ output = model.generate(input_ids, max_length=max_length, min_length=min_length, temperature=temperature, num_return_sequences=1)
14
 
15
+ # Decode the generated output
16
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
17
+
18
+ return generated_text
19
 
20
+ def chatbot_app(prompt, temperature, max_length, min_length):
21
+ generated_text = generate_text(prompt, temperature, max_length, min_length)
22
+ return generated_text
23
 
24
+ iface = gr.Interface(
25
+ fn=chatbot_app,
26
+ inputs=["text", gr.Number(minimum=0.1, maximum=2.0, value=1.0, label="Temperature"),
27
+ gr.Number(minimum=10, maximum=2048, value=10, label="Max Length"),
28
+ gr.Number(minimum=1, maximum=2048, value=1, label="Min Length")],
29
+ outputs="text",
30
+ live=False,
31
+ )
 
 
 
 
 
 
 
 
 
32
 
33
+ iface.launch()