Spaces:

fullstack
/

reflection

Sleeping

App Files Files Community

fullstack commited on Sep 8, 2024

Commit

9a94757

verified ·

1 Parent(s): 6421222

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -24

app.py CHANGED Viewed

@@ -1,18 +1,79 @@
 import gradio as gr
 import requests
 import os
 # Set up the API endpoint and key
-API_URL = os.getenv("BASE_URL")
-API_KEY = os.getenv("RUNPOD_API_KEY")  # Make sure to set this in your Hugging Face Space secrets
 headers = {
     "Authorization": f"Bearer {API_KEY}",
     "Content-Type": "application/json"
 }
-def respond(message, history, system_message, max_tokens, temperature, top_p):
-    messages = [{"role": "system", "content": system_message}]
     for human, assistant in history:
         messages.append({"role": "user", "content": human})
@@ -21,38 +82,63 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     messages.append({"role": "user", "content": message})
     data = {
-        "model": "forcemultiplier/fmx-reflective-2b",  # Adjust if needed
         "messages": messages,
         "max_tokens": max_tokens,
         "temperature": temperature,
-        "top_p": top_p
     }
-    response = requests.post(API_URL, headers=headers, json=data)
-    if response.status_code == 200:
-        return response.json()['choices'][0]['message']['content']
-    else:
-        return f"Error: {response.status_code} - {response.text}"
 demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
-        gr.Textbox(
-            value="You are an advanced artificial intelligence system, capable of <thinking> <reflection> and you output a brief and to-the-point <output>.",
-            label="System message"
-        ),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
-        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import requests
 import os
+import json
+import traceback
+import sys
+import re
+# Enable or disable tracing
+ENABLE_TRACING = False
 # Set up the API endpoint and key
+API_BASE_URL = os.getenv("RUNPOD_API_URL")
+API_KEY = os.getenv("RUNPOD_API_KEY")
+API_URL = f"{API_BASE_URL}/chat/completions"
 headers = {
     "Authorization": f"Bearer {API_KEY}",
     "Content-Type": "application/json"
 }
+import re
+def style_xml_content(text):
+    def replace_content(match):
+        full_match = match.group(0)
+        tag = match.group(1)
+        content = match.group(2)
+        if tag == 'thinking':
+            styled_content = f'<i><b>{content}</b></i>'
+            return f'<details open><summary>&lt;thinking&gt;</summary>{styled_content}<br>&lt;/thinking&gt;</details>'
+        elif tag == 'reflection':
+            styled_content = f'<u><b>{content}</b></u>'
+            return f'<details open><summary>&lt;reflection&gt;</summary>{styled_content}<br>&lt;/reflection&gt;</details>'
+        else:
+            return full_match.replace('<', '&lt;').replace('>', '&gt;')
+    # First, escape all < and > characters
+    text = text.replace('<', '&lt;').replace('>', '&gt;')
+    # Then, unescape the specific tags we want to process
+    text = text.replace('&lt;thinking&gt;', '<thinking>').replace('&lt;/thinking&gt;', '</thinking>')
+    text = text.replace('&lt;reflection&gt;', '<reflection>').replace('&lt;/reflection&gt;', '</reflection>')
+    # Apply styling to content inside tags
+    styled_text = re.sub(r'<(\w+)>(.*?)</\1>', replace_content, text, flags=re.DOTALL)
+    # Remove blacklisted text
+    styled_text = styled_text.replace("&lt;|im_start|&gt;", "")
+    return styled_text
+# Fixed system prompt
+SYSTEM_PROMPT = "You an advanced artificial intelligence system, capable of <thinking> and then creating a length <reflection>, where you ask if you were wrong? And then you correct yourself. Always use <reflection></reflection> unless it is a trivial or wikipedia question. Finally  you output a brief and small to the point <output>."
+def debug_print(*args, **kwargs):
+    if ENABLE_TRACING:
+        print(*args, file=sys.stderr, **kwargs)
+def parse_sse(data):
+    if data:
+        data = data.decode('utf-8').strip()
+        debug_print(f"Raw SSE data: {data}")
+        if data.startswith('data: '):
+            data = data[6:]  # Remove 'data: ' prefix
+        if data == '[DONE]':
+            return None
+        try:
+            return json.loads(data)
+        except json.JSONDecodeError:
+            debug_print(f"Failed to parse SSE data: {data}")
+    return None
+def stream_response(message, history, max_tokens, temperature, top_p):
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     for human, assistant in history:
         messages.append({"role": "user", "content": human})
     messages.append({"role": "user", "content": message})
     data = {
+        "model": "forcemultiplier/fmx-reflective-2b",
         "messages": messages,
         "max_tokens": max_tokens,
         "temperature": temperature,
+        "top_p": top_p,
+        "stream": True,
+        "stop": ["</output>"]  # Add stop sequence
     }
+    debug_print(f"Sending request to API: {API_URL}")
+    debug_print(f"Request data: {json.dumps(data, indent=2)}")
+    try:
+        response = requests.post(API_URL, headers=headers, json=data, stream=True)
+        debug_print(f"Response status code: {response.status_code}")
+        debug_print(f"Response headers: {response.headers}")
+        response.raise_for_status()
+        accumulated_content = ""
+        for line in response.iter_lines():
+            if line:
+                debug_print(f"Received line: {line}")
+                parsed = parse_sse(line)
+                if parsed:
+                    debug_print(f"Parsed SSE data: {parsed}")
+                    if 'choices' in parsed and len(parsed['choices']) > 0:
+                        content = parsed['choices'][0]['delta'].get('content', '')
+                        if content:
+                            accumulated_content += content
+                            styled_content = style_xml_content(accumulated_content)
+                            yield styled_content
+                            # Check if we've reached the stop sequence
+                            if accumulated_content.endswith("</output>"):
+                                break
+    except requests.exceptions.RequestException as e:
+        debug_print(f"Request exception: {str(e)}")
+        debug_print(f"Request exception traceback: {traceback.format_exc()}")
+        yield f"Error: {str(e)}"
+    except Exception as e:
+        debug_print(f"Unexpected error: {str(e)}")
+        debug_print(f"Error traceback: {traceback.format_exc()}")
+        yield f"Unexpected error: {str(e)}"
 demo = gr.ChatInterface(
+    stream_response,
     additional_inputs=[
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.4, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.83, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
 if __name__ == "__main__":
+    debug_print(f"Starting application with API URL: {API_URL}")
+    debug_print(f"Using system prompt: {SYSTEM_PROMPT}")
+    debug_print(f"Tracing enabled: {ENABLE_TRACING}")
     demo.launch()