kz209 commited on
Commit
87bb867
·
1 Parent(s): cc6ba40

add batch evaluation

Browse files
Files changed (2) hide show
  1. pages/batch_evaluation.py +36 -2
  2. pages/leaderboard.py +1 -1
pages/batch_evaluation.py CHANGED
@@ -2,6 +2,8 @@ from dotenv import load_dotenv
2
  import gradio as gr
3
  import random
4
 
 
 
5
  from utils.model import Model
6
  from utils.data import dataset
7
  from utils.metric import metric_rouge_score
@@ -10,6 +12,37 @@ from pages.summarization_playground import model, generate_answer
10
 
11
  load_dotenv()
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def process(seed, model_selection, prompt, num=10):
14
  random.seed(seed)
15
  response_list = []
@@ -32,11 +65,12 @@ def process(seed, model_selection, prompt, num=10):
32
  }
33
  )
34
 
35
- return response_list
 
36
 
37
  def create_batch_evaluation_interface():
38
  with gr.Blocks() as demo:
39
- gr.Markdown("## Here are evaluation setups")
40
 
41
  with gr.Row():
42
  seed = gr.Number(value=8, info="pick your favoriate random seed", precision=0)
 
2
  import gradio as gr
3
  import random
4
 
5
+ import markdown
6
+
7
  from utils.model import Model
8
  from utils.data import dataset
9
  from utils.metric import metric_rouge_score
 
12
 
13
  load_dotenv()
14
 
15
+ def display_results(response_list):
16
+ html_output = ""
17
+
18
+ for i, item in enumerate(response_list, 1):
19
+ dialogue = item['dialogue']
20
+ summary = item['summary']
21
+ response = item['response']
22
+ rouge_score = item['metric_score']['rouge_score']
23
+
24
+ html_output += f"""
25
+ <details>
26
+ <summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary>
27
+ <div style="display: flex; justify-content: space-between;">
28
+ <div style="width: 30%;">
29
+ <h3>Dialogue</h3>
30
+ {markdown.markdown(dialogue)}
31
+ </div>
32
+ <div style="width: 30%;">
33
+ <h3>Summary</h3>
34
+ {markdown.markdown(summary)}
35
+ </div>
36
+ <div style="width: 30%;">
37
+ <h3>Response</h3>
38
+ {markdown.markdown(response)}
39
+ </div>
40
+ </div>
41
+ </details>
42
+ """
43
+
44
+ return html_output
45
+
46
  def process(seed, model_selection, prompt, num=10):
47
  random.seed(seed)
48
  response_list = []
 
65
  }
66
  )
67
 
68
+ return display_results(response_list)
69
+
70
 
71
  def create_batch_evaluation_interface():
72
  with gr.Blocks() as demo:
73
+ gr.Markdown("## Here are evaluation setups. It will randomly sample 10 data points to generate and evaluate. Show results once finished.")
74
 
75
  with gr.Row():
76
  seed = gr.Number(value=8, info="pick your favoriate random seed", precision=0)
pages/leaderboard.py CHANGED
@@ -41,7 +41,7 @@ def create_leaderboard():
41
 
42
  sort_by = gr.Dropdown(list(df.columns), label="Sort by", value="Rank")
43
 
44
- stats = gr.Markdown("**Performance**\n\n**methods**: 4, **questions**: 150")
45
 
46
  leaderboard = gr.HTML(update_leaderboard("Rank"), elem_id="leaderboard")
47
 
 
41
 
42
  sort_by = gr.Dropdown(list(df.columns), label="Sort by", value="Rank")
43
 
44
+ gr.Markdown("**Performance**\n\n**methods**: 4, **questions**: 150")
45
 
46
  leaderboard = gr.HTML(update_leaderboard("Rank"), elem_id="leaderboard")
47