geekyrakshit commited on
Commit
c0fd1af
·
1 Parent(s): 41151eb

update: evaluation app

Browse files
Files changed (1) hide show
  1. application_pages/evaluation_app.py +40 -1
application_pages/evaluation_app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from importlib import import_module
2
 
3
  import pandas as pd
@@ -7,6 +8,7 @@ from dotenv import load_dotenv
7
 
8
  from guardrails_genie.guardrails import GuardrailManager
9
  from guardrails_genie.llm import OpenAIModel
 
10
 
11
 
12
  def initialize_session_state():
@@ -31,6 +33,8 @@ def initialize_session_state():
31
  st.session_state.guardrail_names = []
32
  if "start_evaluations_button" not in st.session_state:
33
  st.session_state.start_evaluations_button = False
 
 
34
 
35
 
36
  def initialize_guardrails():
@@ -185,8 +189,43 @@ if st.session_state.uploaded_file is not None:
185
  st.session_state.guardrail_names = guardrail_names
186
 
187
  initialize_guardrails()
 
 
188
 
189
  start_evaluations_button = st.sidebar.button("Start Evaluations")
190
  st.session_state.start_evaluations_button = start_evaluations_button
191
  if st.session_state.start_evaluations_button:
192
- st.write(len(st.session_state.guardrails))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
  from importlib import import_module
3
 
4
  import pandas as pd
 
8
 
9
  from guardrails_genie.guardrails import GuardrailManager
10
  from guardrails_genie.llm import OpenAIModel
11
+ from guardrails_genie.metrics import AccuracyMetric
12
 
13
 
14
  def initialize_session_state():
 
33
  st.session_state.guardrail_names = []
34
  if "start_evaluations_button" not in st.session_state:
35
  st.session_state.start_evaluations_button = False
36
+ if "evaluation_name" not in st.session_state:
37
+ st.session_state.evaluation_name = ""
38
 
39
 
40
  def initialize_guardrails():
 
189
  st.session_state.guardrail_names = guardrail_names
190
 
191
  initialize_guardrails()
192
+ evaluation_name = st.sidebar.text_input("Evaluation Name", value="")
193
+ st.session_state.evaluation_name = evaluation_name
194
 
195
  start_evaluations_button = st.sidebar.button("Start Evaluations")
196
  st.session_state.start_evaluations_button = start_evaluations_button
197
  if st.session_state.start_evaluations_button:
198
+ # st.write(len(st.session_state.guardrails))
199
+ evaluation = weave.Evaluation(
200
+ dataset=st.session_state.dataset_ref,
201
+ scorers=[AccuracyMetric()],
202
+ streamlit_mode=True,
203
+ )
204
+ with st.expander("Evaluation Results", expanded=True):
205
+ evaluation_summary, call = asyncio.run(
206
+ evaluation.evaluate.call(
207
+ evaluation,
208
+ GuardrailManager(guardrails=st.session_state.guardrails),
209
+ __weave={
210
+ "display_name": (
211
+ "Evaluation.evaluate"
212
+ if st.session_state.evaluation_name == ""
213
+ else "Evaluation.evaluate:"
214
+ + st.session_state.evaluation_name
215
+ )
216
+ },
217
+ )
218
+ )
219
+ x_axis = list(evaluation_summary["AccuracyMetric"].keys())
220
+ y_axis = [
221
+ evaluation_summary["AccuracyMetric"][x_axis_item]
222
+ for x_axis_item in x_axis
223
+ ]
224
+ st.bar_chart(
225
+ pd.DataFrame({"Metric": x_axis, "Score": y_axis}),
226
+ x="Metric",
227
+ y="Score",
228
+ )
229
+ st.markdown(
230
+ f"Explore the entire evaluation trace table in [Weave]({call.ui_url})"
231
+ )