fullstack commited on
Commit
9a94757
·
verified ·
1 Parent(s): 6421222

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -24
app.py CHANGED
@@ -1,18 +1,79 @@
1
  import gradio as gr
2
  import requests
3
  import os
 
 
 
 
 
 
 
4
 
5
  # Set up the API endpoint and key
6
- API_URL = os.getenv("BASE_URL")
7
- API_KEY = os.getenv("RUNPOD_API_KEY") # Make sure to set this in your Hugging Face Space secrets
 
8
 
9
  headers = {
10
  "Authorization": f"Bearer {API_KEY}",
11
  "Content-Type": "application/json"
12
  }
13
 
14
- def respond(message, history, system_message, max_tokens, temperature, top_p):
15
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  for human, assistant in history:
18
  messages.append({"role": "user", "content": human})
@@ -21,38 +82,63 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
21
  messages.append({"role": "user", "content": message})
22
 
23
  data = {
24
- "model": "forcemultiplier/fmx-reflective-2b", # Adjust if needed
25
  "messages": messages,
26
  "max_tokens": max_tokens,
27
  "temperature": temperature,
28
- "top_p": top_p
 
 
29
  }
30
 
31
- response = requests.post(API_URL, headers=headers, json=data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- if response.status_code == 200:
34
- return response.json()['choices'][0]['message']['content']
35
- else:
36
- return f"Error: {response.status_code} - {response.text}"
 
 
 
 
37
 
38
  demo = gr.ChatInterface(
39
- respond,
40
  additional_inputs=[
41
- gr.Textbox(
42
- value="You are an advanced artificial intelligence system, capable of <thinking> <reflection> and you output a brief and to-the-point <output>.",
43
- label="System message"
44
- ),
45
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
46
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
47
- gr.Slider(
48
- minimum=0.1,
49
- maximum=1.0,
50
- value=0.95,
51
- step=0.05,
52
- label="Top-p (nucleus sampling)",
53
- ),
54
  ],
55
  )
56
 
57
  if __name__ == "__main__":
 
 
 
58
  demo.launch()
 
1
  import gradio as gr
2
  import requests
3
  import os
4
+ import json
5
+ import traceback
6
+ import sys
7
+ import re
8
+
9
+ # Enable or disable tracing
10
+ ENABLE_TRACING = False
11
 
12
  # Set up the API endpoint and key
13
+ API_BASE_URL = os.getenv("RUNPOD_API_URL")
14
+ API_KEY = os.getenv("RUNPOD_API_KEY")
15
+ API_URL = f"{API_BASE_URL}/chat/completions"
16
 
17
  headers = {
18
  "Authorization": f"Bearer {API_KEY}",
19
  "Content-Type": "application/json"
20
  }
21
 
22
+ import re
23
+
24
+ def style_xml_content(text):
25
+ def replace_content(match):
26
+ full_match = match.group(0)
27
+ tag = match.group(1)
28
+ content = match.group(2)
29
+
30
+ if tag == 'thinking':
31
+ styled_content = f'<i><b>{content}</b></i>'
32
+ return f'<details open><summary>&lt;thinking&gt;</summary>{styled_content}<br>&lt;/thinking&gt;</details>'
33
+ elif tag == 'reflection':
34
+ styled_content = f'<u><b>{content}</b></u>'
35
+ return f'<details open><summary>&lt;reflection&gt;</summary>{styled_content}<br>&lt;/reflection&gt;</details>'
36
+ else:
37
+ return full_match.replace('<', '&lt;').replace('>', '&gt;')
38
+
39
+ # First, escape all < and > characters
40
+ text = text.replace('<', '&lt;').replace('>', '&gt;')
41
+
42
+ # Then, unescape the specific tags we want to process
43
+ text = text.replace('&lt;thinking&gt;', '<thinking>').replace('&lt;/thinking&gt;', '</thinking>')
44
+ text = text.replace('&lt;reflection&gt;', '<reflection>').replace('&lt;/reflection&gt;', '</reflection>')
45
+
46
+ # Apply styling to content inside tags
47
+ styled_text = re.sub(r'<(\w+)>(.*?)</\1>', replace_content, text, flags=re.DOTALL)
48
+
49
+ # Remove blacklisted text
50
+ styled_text = styled_text.replace("&lt;|im_start|&gt;", "")
51
+
52
+ return styled_text
53
+
54
+ # Fixed system prompt
55
+ SYSTEM_PROMPT = "You an advanced artificial intelligence system, capable of <thinking> and then creating a length <reflection>, where you ask if you were wrong? And then you correct yourself. Always use <reflection></reflection> unless it is a trivial or wikipedia question. Finally you output a brief and small to the point <output>."
56
+
57
+ def debug_print(*args, **kwargs):
58
+ if ENABLE_TRACING:
59
+ print(*args, file=sys.stderr, **kwargs)
60
+
61
+ def parse_sse(data):
62
+ if data:
63
+ data = data.decode('utf-8').strip()
64
+ debug_print(f"Raw SSE data: {data}")
65
+ if data.startswith('data: '):
66
+ data = data[6:] # Remove 'data: ' prefix
67
+ if data == '[DONE]':
68
+ return None
69
+ try:
70
+ return json.loads(data)
71
+ except json.JSONDecodeError:
72
+ debug_print(f"Failed to parse SSE data: {data}")
73
+ return None
74
+
75
+ def stream_response(message, history, max_tokens, temperature, top_p):
76
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
77
 
78
  for human, assistant in history:
79
  messages.append({"role": "user", "content": human})
 
82
  messages.append({"role": "user", "content": message})
83
 
84
  data = {
85
+ "model": "forcemultiplier/fmx-reflective-2b",
86
  "messages": messages,
87
  "max_tokens": max_tokens,
88
  "temperature": temperature,
89
+ "top_p": top_p,
90
+ "stream": True,
91
+ "stop": ["</output>"] # Add stop sequence
92
  }
93
 
94
+ debug_print(f"Sending request to API: {API_URL}")
95
+ debug_print(f"Request data: {json.dumps(data, indent=2)}")
96
+
97
+ try:
98
+ response = requests.post(API_URL, headers=headers, json=data, stream=True)
99
+ debug_print(f"Response status code: {response.status_code}")
100
+ debug_print(f"Response headers: {response.headers}")
101
+
102
+ response.raise_for_status()
103
+
104
+ accumulated_content = ""
105
+ for line in response.iter_lines():
106
+ if line:
107
+ debug_print(f"Received line: {line}")
108
+ parsed = parse_sse(line)
109
+ if parsed:
110
+ debug_print(f"Parsed SSE data: {parsed}")
111
+ if 'choices' in parsed and len(parsed['choices']) > 0:
112
+ content = parsed['choices'][0]['delta'].get('content', '')
113
+ if content:
114
+ accumulated_content += content
115
+ styled_content = style_xml_content(accumulated_content)
116
+ yield styled_content
117
+
118
+ # Check if we've reached the stop sequence
119
+ if accumulated_content.endswith("</output>"):
120
+ break
121
 
122
+ except requests.exceptions.RequestException as e:
123
+ debug_print(f"Request exception: {str(e)}")
124
+ debug_print(f"Request exception traceback: {traceback.format_exc()}")
125
+ yield f"Error: {str(e)}"
126
+ except Exception as e:
127
+ debug_print(f"Unexpected error: {str(e)}")
128
+ debug_print(f"Error traceback: {traceback.format_exc()}")
129
+ yield f"Unexpected error: {str(e)}"
130
 
131
  demo = gr.ChatInterface(
132
+ stream_response,
133
  additional_inputs=[
 
 
 
 
134
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
135
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.4, step=0.1, label="Temperature"),
136
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.83, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
137
  ],
138
  )
139
 
140
  if __name__ == "__main__":
141
+ debug_print(f"Starting application with API URL: {API_URL}")
142
+ debug_print(f"Using system prompt: {SYSTEM_PROMPT}")
143
+ debug_print(f"Tracing enabled: {ENABLE_TRACING}")
144
  demo.launch()