Spaces:

halimbahae
/

CohortBot

Sleeping

App Files Files Community

halimbahae commited on Dec 23, 2024

Commit

2162f38

verified ·

1 Parent(s): 8789e2d

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -59

app.py CHANGED Viewed

@@ -2,20 +2,14 @@ import gradio as gr
 import pandas as pd
 import re
 from huggingface_hub import InferenceClient
-import spacy
-from collections import Counter
 import plotly.express as px
-import plotly.graph_objects as go
-from datetime import datetime
-# Load SpaCy model for NLP
-nlp = spacy.load("en_core_web_sm")
 # Initialize Hugging Face client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def parse_message(message):
-    """Extract information from a chat message using regex and NLP."""
     info = {}
     # Extract timestamp and phone number
@@ -48,11 +42,6 @@ def parse_message(message):
         thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
         if thesis_match:
             info['thesis_topic'] = thesis_match.group(1).strip()
-        # Extract LinkedIn URL
-        linkedin_match = re.search(r'https?://(?:www\.)?linkedin\.com\S+', content)
-        if linkedin_match:
-            info['linkedin'] = linkedin_match.group(0)
     return info
@@ -85,7 +74,7 @@ def create_visualizations(df):
     figures = []
     # 1. Affiliation Distribution
-    if 'affiliation' in df.columns:
         affiliation_counts = df['affiliation'].value_counts()
         fig_affiliation = px.pie(
             values=affiliation_counts.values,
@@ -104,8 +93,8 @@ def create_visualizations(df):
             labels={'x': 'Field', 'y': 'Count'}
         )
         figures.append(fig_fields)
-    return figures
 def respond(
     message,
@@ -117,51 +106,52 @@ def respond(
     chat_history_text=""
 ):
     """Enhanced response function with data analysis capabilities."""
-    # Process chat history if provided
-    if chat_history_text:
-        df = create_researcher_df(chat_history_text)
-        # Generate analysis summary
-        summary = f"Analysis of {len(df)} researchers:\n"
-        if 'affiliation' in df.columns:
-            summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
-        field_counts = analyze_research_fields(df)
-        if not field_counts.empty:
-            top_fields = field_counts.nlargest(3)
-            summary += "- Top research fields:\n"
-            for field, count in top_fields.items():
-                summary += f"  • {field}: {count} researchers\n"
-        # Create visualizations
-        figures = create_visualizations(df)
-        # Add analysis to message
-        message += f"\n\nCommunity Analysis:\n{summary}"
-    # Generate response using the LLM
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for token in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token_content = token.choices[0].delta.content
-        response += token_content
-        yield response
-# Create enhanced Gradio interface
 demo = gr.Interface(
     fn=respond,
     inputs=[
@@ -177,8 +167,8 @@ demo = gr.Interface(
         gr.Textbox(label="Response"),
         gr.Plot(label="Community Analysis")
     ],
-    title="Research Community Analyzer",
-    description="An enhanced chatbot that analyzes research community data and provides visualizations."
 )
 if __name__ == "__main__":

 import pandas as pd
 import re
 from huggingface_hub import InferenceClient
 import plotly.express as px
+from collections import Counter
 # Initialize Hugging Face client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def parse_message(message):
+    """Extract information from a chat message using regex."""
     info = {}
     # Extract timestamp and phone number
         thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
         if thesis_match:
             info['thesis_topic'] = thesis_match.group(1).strip()
     return info
     figures = []
     # 1. Affiliation Distribution
+    if 'affiliation' in df.columns and not df['affiliation'].empty:
         affiliation_counts = df['affiliation'].value_counts()
         fig_affiliation = px.pie(
             values=affiliation_counts.values,
             labels={'x': 'Field', 'y': 'Count'}
         )
         figures.append(fig_fields)
+    return figures[0] if figures else None
 def respond(
     message,
     chat_history_text=""
 ):
     """Enhanced response function with data analysis capabilities."""
+    try:
+        # Process chat history if provided
+        if chat_history_text:
+            df = create_researcher_df(chat_history_text)
+            # Generate analysis summary
+            summary = f"Analysis of {len(df)} researchers:\n"
+            if 'affiliation' in df.columns:
+                summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
+            field_counts = analyze_research_fields(df)
+            if not field_counts.empty:
+                top_fields = field_counts.nlargest(3)
+                summary += "- Top research fields:\n"
+                for field, count in top_fields.items():
+                    summary += f"  • {field}: {count} researchers\n"
+            # Add analysis to message
+            message += f"\n\nCommunity Analysis:\n{summary}"
+        # Generate response using the LLM
+        messages = [{"role": "system", "content": system_message}]
+        for val in history:
+            if val[0]:
+                messages.append({"role": "user", "content": val[0]})
+            if val[1]:
+                messages.append({"role": "assistant", "content": val[1]})
+        messages.append({"role": "user", "content": message})
+        response = ""
+        for token in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token_content = token.choices[0].delta.content
+            response += token_content
+            yield response
+    except Exception as e:
+        yield f"Error: {str(e)}"
+# Create Gradio interface
 demo = gr.Interface(
     fn=respond,
     inputs=[
         gr.Textbox(label="Response"),
         gr.Plot(label="Community Analysis")
     ],
+    title="CohortBot",
+    description="A chatbot that analyzes research community data and provides visualizations."
 )
 if __name__ == "__main__":