halimbahae commited on
Commit
2162f38
·
verified ·
1 Parent(s): 8789e2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -59
app.py CHANGED
@@ -2,20 +2,14 @@ import gradio as gr
2
  import pandas as pd
3
  import re
4
  from huggingface_hub import InferenceClient
5
- import spacy
6
- from collections import Counter
7
  import plotly.express as px
8
- import plotly.graph_objects as go
9
- from datetime import datetime
10
-
11
- # Load SpaCy model for NLP
12
- nlp = spacy.load("en_core_web_sm")
13
 
14
  # Initialize Hugging Face client
15
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
16
 
17
  def parse_message(message):
18
- """Extract information from a chat message using regex and NLP."""
19
  info = {}
20
 
21
  # Extract timestamp and phone number
@@ -48,11 +42,6 @@ def parse_message(message):
48
  thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
49
  if thesis_match:
50
  info['thesis_topic'] = thesis_match.group(1).strip()
51
-
52
- # Extract LinkedIn URL
53
- linkedin_match = re.search(r'https?://(?:www\.)?linkedin\.com\S+', content)
54
- if linkedin_match:
55
- info['linkedin'] = linkedin_match.group(0)
56
 
57
  return info
58
 
@@ -85,7 +74,7 @@ def create_visualizations(df):
85
  figures = []
86
 
87
  # 1. Affiliation Distribution
88
- if 'affiliation' in df.columns:
89
  affiliation_counts = df['affiliation'].value_counts()
90
  fig_affiliation = px.pie(
91
  values=affiliation_counts.values,
@@ -104,8 +93,8 @@ def create_visualizations(df):
104
  labels={'x': 'Field', 'y': 'Count'}
105
  )
106
  figures.append(fig_fields)
107
-
108
- return figures
109
 
110
  def respond(
111
  message,
@@ -117,51 +106,52 @@ def respond(
117
  chat_history_text=""
118
  ):
119
  """Enhanced response function with data analysis capabilities."""
120
- # Process chat history if provided
121
- if chat_history_text:
122
- df = create_researcher_df(chat_history_text)
123
-
124
- # Generate analysis summary
125
- summary = f"Analysis of {len(df)} researchers:\n"
126
- if 'affiliation' in df.columns:
127
- summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- field_counts = analyze_research_fields(df)
130
- if not field_counts.empty:
131
- top_fields = field_counts.nlargest(3)
132
- summary += "- Top research fields:\n"
133
- for field, count in top_fields.items():
134
- summary += f" • {field}: {count} researchers\n"
 
135
 
136
- # Create visualizations
137
- figures = create_visualizations(df)
138
 
139
- # Add analysis to message
140
- message += f"\n\nCommunity Analysis:\n{summary}"
141
-
142
- # Generate response using the LLM
143
- messages = [{"role": "system", "content": system_message}]
144
- for val in history:
145
- if val[0]:
146
- messages.append({"role": "user", "content": val[0]})
147
- if val[1]:
148
- messages.append({"role": "assistant", "content": val[1]})
149
-
150
- messages.append({"role": "user", "content": message})
151
-
152
- response = ""
153
- for token in client.chat_completion(
154
- messages,
155
- max_tokens=max_tokens,
156
- stream=True,
157
- temperature=temperature,
158
- top_p=top_p,
159
- ):
160
- token_content = token.choices[0].delta.content
161
- response += token_content
162
- yield response
163
 
164
- # Create enhanced Gradio interface
165
  demo = gr.Interface(
166
  fn=respond,
167
  inputs=[
@@ -177,8 +167,8 @@ demo = gr.Interface(
177
  gr.Textbox(label="Response"),
178
  gr.Plot(label="Community Analysis")
179
  ],
180
- title="Research Community Analyzer",
181
- description="An enhanced chatbot that analyzes research community data and provides visualizations."
182
  )
183
 
184
  if __name__ == "__main__":
 
2
  import pandas as pd
3
  import re
4
  from huggingface_hub import InferenceClient
 
 
5
  import plotly.express as px
6
+ from collections import Counter
 
 
 
 
7
 
8
  # Initialize Hugging Face client
9
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
10
 
11
  def parse_message(message):
12
+ """Extract information from a chat message using regex."""
13
  info = {}
14
 
15
  # Extract timestamp and phone number
 
42
  thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
43
  if thesis_match:
44
  info['thesis_topic'] = thesis_match.group(1).strip()
 
 
 
 
 
45
 
46
  return info
47
 
 
74
  figures = []
75
 
76
  # 1. Affiliation Distribution
77
+ if 'affiliation' in df.columns and not df['affiliation'].empty:
78
  affiliation_counts = df['affiliation'].value_counts()
79
  fig_affiliation = px.pie(
80
  values=affiliation_counts.values,
 
93
  labels={'x': 'Field', 'y': 'Count'}
94
  )
95
  figures.append(fig_fields)
96
+
97
+ return figures[0] if figures else None
98
 
99
  def respond(
100
  message,
 
106
  chat_history_text=""
107
  ):
108
  """Enhanced response function with data analysis capabilities."""
109
+ try:
110
+ # Process chat history if provided
111
+ if chat_history_text:
112
+ df = create_researcher_df(chat_history_text)
113
+
114
+ # Generate analysis summary
115
+ summary = f"Analysis of {len(df)} researchers:\n"
116
+ if 'affiliation' in df.columns:
117
+ summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
118
+
119
+ field_counts = analyze_research_fields(df)
120
+ if not field_counts.empty:
121
+ top_fields = field_counts.nlargest(3)
122
+ summary += "- Top research fields:\n"
123
+ for field, count in top_fields.items():
124
+ summary += f" • {field}: {count} researchers\n"
125
+
126
+ # Add analysis to message
127
+ message += f"\n\nCommunity Analysis:\n{summary}"
128
 
129
+ # Generate response using the LLM
130
+ messages = [{"role": "system", "content": system_message}]
131
+ for val in history:
132
+ if val[0]:
133
+ messages.append({"role": "user", "content": val[0]})
134
+ if val[1]:
135
+ messages.append({"role": "assistant", "content": val[1]})
136
 
137
+ messages.append({"role": "user", "content": message})
 
138
 
139
+ response = ""
140
+ for token in client.chat_completion(
141
+ messages,
142
+ max_tokens=max_tokens,
143
+ stream=True,
144
+ temperature=temperature,
145
+ top_p=top_p,
146
+ ):
147
+ token_content = token.choices[0].delta.content
148
+ response += token_content
149
+ yield response
150
+
151
+ except Exception as e:
152
+ yield f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
153
 
154
+ # Create Gradio interface
155
  demo = gr.Interface(
156
  fn=respond,
157
  inputs=[
 
167
  gr.Textbox(label="Response"),
168
  gr.Plot(label="Community Analysis")
169
  ],
170
+ title="CohortBot",
171
+ description="A chatbot that analyzes research community data and provides visualizations."
172
  )
173
 
174
  if __name__ == "__main__":