ndwdgda commited on
Commit
d775587
·
verified ·
1 Parent(s): c02992e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -43
app.py CHANGED
@@ -4,47 +4,23 @@
4
  import gradio as gr
5
  from transformers import pipeline
6
  import torch
 
7
  import threading
8
  import time
9
  import tensorflow as tf
 
 
10
 
11
- # Check GPU availability
12
- print(torch.cuda.is_available())
13
- print(tf.test.gpu_device_name())
14
-
15
- # Initialize the text generation pipeline with the specified model
16
- pipe = pipeline("text-generation", model="chargoddard/Yi-34B-Llama", device=0)
17
-
18
- # Rate limiting parameters
19
- rate_limit = 5 # Number of requests per second
20
- last_request_time = 0
21
 
22
- def respond(
23
- message,
24
- history: list[tuple[str, str]],
25
- system_message,
26
- max_tokens,
27
- temperature,
28
- top_p,
29
- ):
30
- global last_request_time
31
 
32
- # Apply rate limiting
33
- elapsed_time = time.time() - last_request_time
34
- if elapsed_time < 1.0 / rate_limit:
35
- time.sleep(1.0 / rate_limit - elapsed_time)
36
 
37
- last_request_time = time.time()
38
 
39
- messages = [{"role": "system", "content": system_message}]
40
 
41
- for val in history:
42
- if val[0]:
43
- messages.append({"role": "user", "content": val[0]})
44
- if val[1]:
45
- messages.append({"role": "assistant", "content": val[1]})
46
 
47
- messages.append({"role": "user", "content": message})
 
48
 
49
  response = ""
50
 
@@ -57,25 +33,19 @@ def respond(
57
  top_p=top_p,
58
  )
59
 
 
60
  response = result[0]['generated_text']
61
  yield response
62
 
63
  # Gradio interface setup
 
 
64
  demo = gr.ChatInterface(
65
  respond,
66
  additional_inputs=[
67
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
68
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
69
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
70
- gr.Slider(
71
- minimum=0.1,
72
- maximum=1.0,
73
- value=0.95,
74
- step=0.05,
75
- label="Top-p (nucleus sampling)",
76
- ),
77
  ],
78
  )
79
 
80
- # Launch the Gradio interface
81
- demo.launch()
 
 
4
  import gradio as gr
5
  from transformers import pipeline
6
  import torch
7
+ import huggingfacehub as infer
8
  import threading
9
  import time
10
  import tensorflow as tf
11
+ mport gradio as gr
12
+ from transformers import pipeline
13
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
 
 
 
 
15
 
 
 
 
 
16
 
 
17
 
 
18
 
19
+ # Initialize the text generation pipeline with the specified model
20
+ pipe = pipeline("text-generation", model="chargoddard/Yi-34B-Llama", device=0)
 
 
 
21
 
22
+ def respond(
23
+ message,
24
 
25
  response = ""
26
 
 
33
  top_p=top_p,
34
  )
35
 
36
+
37
  response = result[0]['generated_text']
38
  yield response
39
 
40
  # Gradio interface setup
41
+
42
+
43
  demo = gr.ChatInterface(
44
  respond,
45
  additional_inputs=[
 
 
 
 
 
 
 
 
 
 
46
  ],
47
  )
48
 
49
+
50
+ if __name__ == "__main__":
51
+ demo.launch()