David commited on
Commit
7e9b0bd
·
1 Parent(s): be52360
Files changed (1) hide show
  1. app.py +26 -61
app.py CHANGED
@@ -2,8 +2,7 @@ import gradio as gr
2
  import requests
3
  import os
4
  import json
5
- import re
6
- import html
7
 
8
  # Set up the API endpoint and key
9
  API_URL = os.getenv("RUNPOD_API_URL")
@@ -14,34 +13,11 @@ headers = {
14
  "Content-Type": "application/json"
15
  }
16
 
17
- def escape_html(text):
18
- return html.escape(text)
19
 
20
- def format_response_for_display(text):
21
- # Escape HTML entities
22
- text = escape_html(text)
23
-
24
- # Format <thinking> and <reflection> tags (case-insensitive)
25
- text = re.sub(r'(?i)(<thinking>)(.*?)(</thinking>)',
26
- r'<span style="font-family: monospace; color: blue;">\1</span><i>\2</i><span style="font-family: monospace; color: blue;">\3</span>',
27
- text, flags=re.DOTALL)
28
- text = re.sub(r'(?i)(<reflection>)(.*?)(</reflection>)',
29
- r'<span style="font-family: monospace; color: green;">\1</span><i>\2</i><span style="font-family: monospace; color: green;">\3</span>',
30
- text, flags=re.DOTALL)
31
-
32
- # Remove <output> tags but keep content
33
- text = re.sub(r'(?i)<output>(.*?)</output>', r'\1', text, flags=re.DOTALL)
34
-
35
- # Replace newlines with <br> tags
36
- text = text.replace('\n', '<br>')
37
-
38
- return text
39
-
40
- def respond(message, history, system_message, max_tokens, temperature, top_p):
41
- if system_message is not None:
42
- messages = [{"role": "system", "content": system_message}]
43
- else:
44
- messages = []
45
 
46
  for human, assistant in history:
47
  messages.append({"role": "user", "content": human})
@@ -54,55 +30,44 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
54
  "messages": messages,
55
  "max_tokens": max_tokens,
56
  "temperature": temperature,
57
- "top_p": top_p
 
58
  }
59
 
60
  try:
61
- print(f"Sending request to API: {API_URL}")
62
- print(f"Request data: {json.dumps(data, indent=2)}")
63
-
64
- response = requests.post(API_URL, headers=headers, json=data)
65
-
66
- print(f"Raw API Response: {response.text}")
67
- print(f"Response Status Code: {response.status_code}")
68
-
69
  response.raise_for_status()
 
70
 
71
- response_json = response.json()
72
- print(f"Formatted API Response: {json.dumps(response_json, indent=2)}")
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- if 'choices' in response_json and len(response_json['choices']) > 0:
75
- content = response_json['choices'][0]['message']['content']
76
- formatted_content = format_response_for_display(content)
77
- print(f"Formatted content for display: {formatted_content}") # For debugging
78
- return formatted_content
79
- else:
80
- return f"Error: Unexpected response format. Full response: {response_json}"
81
-
82
  except requests.exceptions.RequestException as e:
83
- print(f"Request Exception: {str(e)}")
84
- return f"Error: {str(e)}"
85
- except ValueError as e:
86
- print(f"JSON Parsing Error: {str(e)}")
87
- return f"Error: Invalid JSON response. {str(e)}"
88
- except KeyError as e:
89
- print(f"Key Error: {str(e)}")
90
- return f"Error: Unexpected response structure. Missing key: {str(e)}"
91
  except Exception as e:
92
- print(f"Unexpected Error: {str(e)}")
93
- return f"Unexpected error: {str(e)}"
94
 
95
  demo = gr.ChatInterface(
96
- respond,
97
  additional_inputs=[
98
- gr.Textbox(label="System message"),
99
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
100
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
101
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
102
  ],
103
- css=".message-wrap { white-space: pre-wrap; }"
104
  )
105
 
106
  if __name__ == "__main__":
107
  print(f"Starting application with API URL: {API_URL}")
 
108
  demo.launch()
 
2
  import requests
3
  import os
4
  import json
5
+ import sseclient
 
6
 
7
  # Set up the API endpoint and key
8
  API_URL = os.getenv("RUNPOD_API_URL")
 
13
  "Content-Type": "application/json"
14
  }
15
 
16
+ # Fixed system prompt
17
+ SYSTEM_PROMPT = "You an advanced artificial intelligence system, capable of <thinking> <reflection> and you output a brief and small to the point <output>."
18
 
19
+ def stream_response(message, history, max_tokens, temperature, top_p):
20
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  for human, assistant in history:
23
  messages.append({"role": "user", "content": human})
 
30
  "messages": messages,
31
  "max_tokens": max_tokens,
32
  "temperature": temperature,
33
+ "top_p": top_p,
34
+ "stream": True
35
  }
36
 
37
  try:
38
+ response = requests.post(API_URL, headers=headers, json=data, stream=True)
 
 
 
 
 
 
 
39
  response.raise_for_status()
40
+ client = sseclient.SSEClient(response)
41
 
42
+ full_response = ""
43
+ for event in client.events():
44
+ if event.data != "[DONE]":
45
+ try:
46
+ chunk = json.loads(event.data)
47
+ if 'choices' in chunk and len(chunk['choices']) > 0:
48
+ content = chunk['choices'][0]['delta'].get('content', '')
49
+ full_response += content
50
+ # Replace < and > with their HTML entities
51
+ display_content = content.replace('<', '&lt;').replace('>', '&gt;')
52
+ yield display_content
53
+ except json.JSONDecodeError:
54
+ print(f"Failed to decode JSON: {event.data}")
55
 
 
 
 
 
 
 
 
 
56
  except requests.exceptions.RequestException as e:
57
+ yield f"Error: {str(e)}"
 
 
 
 
 
 
 
58
  except Exception as e:
59
+ yield f"Unexpected error: {str(e)}"
 
60
 
61
  demo = gr.ChatInterface(
62
+ stream_response,
63
  additional_inputs=[
 
64
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
65
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
66
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
67
  ],
 
68
  )
69
 
70
  if __name__ == "__main__":
71
  print(f"Starting application with API URL: {API_URL}")
72
+ print(f"Using system prompt: {SYSTEM_PROMPT}")
73
  demo.launch()