Fabrice-TIERCELIN commited on
Commit
508030c
·
verified ·
1 Parent(s): 72e210d

Upload 3 files

Browse files
llava/eval/webpage/index.html ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Who's GPT-4's favorite? Battles between State-of-the-Art Chatbots</title>
7
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
8
+ <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
9
+ <link rel="stylesheet" href="styles.css">
10
+ </head>
11
+
12
+ <body>
13
+ <nav class="navbar navbar-expand-lg navbar-dark bg-dark">
14
+ <a class="navbar-brand" href="#">🏔️ Vicuna Evaluation Examples</a>
15
+ <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
16
+ <span class="navbar-toggler-icon"></span>
17
+ </button>
18
+ <div class="collapse navbar-collapse" id="navbarNav">
19
+ <ul class="navbar-nav mr-auto">
20
+ <li class="nav-item">
21
+ <a class="nav-link" href="https://chat.lmsys.org/">Demo</a>
22
+ </li>
23
+ <li class="nav-item">
24
+ <a class="nav-link" href="https://vicuna.lmsys.org">Blog</a>
25
+ </li>
26
+ <li class="nav-item">
27
+ <a class="nav-link" href="https://github.com/lm-sys/FastChat">Github</a>
28
+ </li>
29
+ </ul>
30
+ </div>
31
+ </nav>
32
+
33
+ <div class="container mt-5">
34
+ <h2 class="text-center mb-5">Who's GPT-4's favorite? Battles between State-of-the-Art Chatbots</h2>
35
+
36
+ <!-- Selection -->
37
+ <div class="form-row">
38
+ <div class="form-group col-md-2">
39
+ <label for="category-select">Category</label>
40
+ <select class="form-control" id="category-select"></select>
41
+ </div>
42
+ <div class="form-group col-md-8">
43
+ <label for="question-select">Question</label>
44
+ <select class="form-control" id="question-select"></select>
45
+ </div>
46
+ <div class="form-group col-md-2">
47
+ <div class="col-md-2"><label>&nbsp;</label></div>
48
+ <div class="btn-group" role="group" aria-label="Left and Right Controller">
49
+ <button type="button" class="form-control btn btn-primary" id="prev-question"><i class="material-icons">keyboard_arrow_left</i></button>
50
+ <button type="button" class="form-control btn btn-primary" id="next-question"><i class="material-icons">keyboard_arrow_right</i></button>
51
+ </div>
52
+ </div>
53
+ </div>
54
+
55
+ <!-- "Battle" -->
56
+ <div class="row mb-4" style="justify-content: center;">
57
+ <div class="col" style="display: flex; justify-content: center; align-items: center;">
58
+ <label class="adjustable-font-size" id="other-score-label">*/10</label>
59
+ </div>
60
+ <div class="col">
61
+ <div class="vertical-flex-layout">
62
+ <img class="shadow figure-img img-fluid" src="" alt="other logo" width="150" id="other-model-figure">
63
+ </div>
64
+ </div>
65
+ <div class="col">
66
+ <div class="vertical-flex-layout">
67
+ <!-- from: https://fonts.google.com/icons?icon.query=battle&selected=Material+Symbols+Outlined:swords:FILL@0;wght@300;GRAD@0;opsz@48&icon.style=Outlined -->
68
+ <img class="figure-img img-fluid" src="figures/swords_FILL0_wght300_GRAD0_opsz48.svg" width="60" height="60">
69
+ </div>
70
+ </div>
71
+ <div class="col">
72
+ <div class="vertical-flex-layout">
73
+ <img class="shadow figure-img img-fluid" src="figures/vicuna.jpeg" alt="vicuna logo" width="150" id="our-model-figure">
74
+ </div>
75
+ </div>
76
+ <div class="col" style="display: flex; justify-content: center; align-items: center;">
77
+ <label class="adjustable-font-size" id="our-score-label">*/10</label>
78
+ </div>
79
+ </div>
80
+
81
+ <!-- Question Card -->
82
+ <div class="card mb-4">
83
+ <div class="card-body" id="selected-question"></div>
84
+ </div>
85
+
86
+ <!-- Answer Cards -->
87
+ <div class="row">
88
+ <div class="col-md-6">
89
+ <div class="card mb-4 expandable-card">
90
+ <div class="card-header" style="padding-bottom: 0.2rem" id="other-model-header-bg">
91
+ <div class="row">
92
+ <div class="col-md-5" style="align-items: center; display: flex;">
93
+ <label id="other-model-header">Assistant #1</label>
94
+ </div>
95
+ <div class="col-md-7">
96
+ <select class="form-control" id="model-select" style="height: fit-content; margin-top: -0.3rem;"></select>
97
+ </div>
98
+ </div>
99
+ </div>
100
+ <div class="card-body">
101
+ <div class="card-text-container">
102
+ <div class="card-text" id="other-model-answer"></div>
103
+ </div>
104
+ <div class="btn btn-primary expand-btn" style="display:flex;"></div>
105
+ </div>
106
+ </div>
107
+ </div>
108
+ <div class="col-md-6">
109
+ <div class="card mb-4 expandable-card">
110
+ <div class="card-header" id="our-model-header">
111
+ Assistant #2 (Vicuna, our model)
112
+ </div>
113
+ <div class="card-body">
114
+ <div class="card-text-container">
115
+ <div class="card-text" id="our-model-answer"></div>
116
+ </div>
117
+ <div class="btn btn-primary expand-btn" style="display:flex;"></div>
118
+ </div>
119
+ </div>
120
+ </div>
121
+ </div>
122
+
123
+ <!-- Evaluation -->
124
+ <div class="card expandable-card">
125
+ <div class="card-header" style="background-color: #c9c9f2;" id="evaluation-header">GPT-4 Evaluation</div>
126
+ <div class="card-body">
127
+ <div class="card-text-container">
128
+ <div class="card-text" id="evaluation-result"></div>
129
+ </div>
130
+ <div class="btn btn-primary expand-btn" style="display:flex;"></div>
131
+ </div>
132
+ </div>
133
+ </div>
134
+
135
+ <div class="container-fluid bg-light py-2">
136
+ <div class="text-center">
137
+ <small class="text-muted">This website is co-authored with <a href="https://openai.com" target="_blank">GPT-4</a>.</small>
138
+ </div>
139
+ </div>
140
+
141
+ <!-- Marked.js -->
142
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/marked.umd.min.js"></script>
143
+ <!-- Bootstrap and Popper.js JavaScript dependencies -->
144
+ <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script>
145
+ <script src="https://cdn.jsdelivr.net/npm/@popperjs/[email protected]/dist/umd/popper.min.js"></script>
146
+ <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script>
147
+
148
+ <script src="script.js"></script>
149
+ <script>
150
+ // Fetch the JSON file
151
+ fetch('data.json')
152
+ .then(response => response.json())
153
+ .then(json_data => {
154
+ // Populate the models and questions.
155
+ populateModels(json_data.models);
156
+ populateQuestions(json_data.questions);
157
+ displayQuestion(currentQuestionIndex);
158
+ }).catch(error => console.error(error));
159
+ </script>
160
+ </body>
161
+
162
+ </html>
llava/eval/webpage/script.js ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Description: Script for the evaluation webpage.
2
+
3
+ let currentQuestionIndex = 1;
4
+
5
+ // Store the model name mapping for later use.
6
+ modelNameMapping = {
7
+ "gpt35": "ChatGPT-3.5",
8
+ "gpt4": "GPT-4",
9
+ "alpaca": "Alpaca-13b",
10
+ "vicuna": "Vicuna-13b",
11
+ "llama": "LLaMA-13b",
12
+ "bard": "Bard",
13
+ };
14
+
15
+ modelFigureMapping = {
16
+ "vicuna": "figures/vicuna.jpeg",
17
+ // Image from: https://commons.wikimedia.org/wiki/File:ChatGPT_logo.svg
18
+ "gpt35": "figures/chatgpt.svg",
19
+ // Image from: https://www.reddit.com/r/logodesign/comments/1128aat/google_ai_bard_logo_design/
20
+ "bard": "figures/bard.jpg",
21
+ // Image from: https://crfm.stanford.edu/2023/03/13/alpaca.html
22
+ "alpaca": "figures/alpaca.png",
23
+ // Image adapted from https://commons.wikimedia.org/wiki/File:Llama_on_Machu_Picchu.jpg
24
+ "llama": "figures/llama.jpg",
25
+ }
26
+
27
+ // Store the question data in a mapping for later use.
28
+ questionMapping = {};
29
+ // Store the question ids in a mapping for later use.
30
+ categoryMapping = {};
31
+ // Store the number of questions for later use.
32
+ questionsCount = 0;
33
+
34
+
35
+ function text2Markdown(text) {
36
+ // Normalize the text for markdown rendering.
37
+ text = text.trim().replaceAll('\n\n', '\n').replaceAll('\n', '\n\n');
38
+ return marked.parse(text);
39
+ }
40
+
41
+ function capitalizeFirstChar(str) {
42
+ if (!str || str.length === 0) {
43
+ return str;
44
+ }
45
+ return str.charAt(0).toUpperCase() + str.slice(1);
46
+ }
47
+
48
+ function updateQuestionSelect(question_id) {
49
+ const select = document.getElementById('question-select');
50
+ // Clear the question select.
51
+ select.innerHTML = '';
52
+ // Populate the question select.
53
+ category = questionMapping[question_id].category;
54
+ categoryMapping[category].forEach(question_id => {
55
+ const question = questionMapping[question_id];
56
+ const option = document.createElement('option');
57
+ option.value = question_id;
58
+ option.textContent = 'Q' + question_id.toString() + ': ' + question.question;
59
+ select.appendChild(option);
60
+ });
61
+ select.value = question_id;
62
+ }
63
+
64
+ function updateModelSelect() {
65
+ const select = document.getElementById('model-select');
66
+ img_path = modelFigureMapping[select.value];
67
+ document.getElementById('other-model-figure').src = img_path;
68
+ }
69
+
70
+ function populateModels(models) {
71
+ const select = document.getElementById('model-select');
72
+ models.forEach(model => {
73
+ const option = document.createElement('option');
74
+ option.value = model;
75
+ option.textContent = modelNameMapping[model];
76
+ select.appendChild(option);
77
+ });
78
+ updateModelSelect();
79
+ }
80
+
81
+ function populateQuestions(questions) {
82
+ const category_select = document.getElementById('category-select');
83
+
84
+ questionsCount = questions.length;
85
+ questions.forEach(question => {
86
+ const option = document.createElement('option');
87
+ // Store the question data in a mapping for later use.
88
+ questionMapping[question.id] = {
89
+ category: question.category,
90
+ question: question.question,
91
+ answers: question.answers,
92
+ evaluations: question.evaluations,
93
+ scores: question.scores,
94
+ };
95
+ // Store the question id in the category mapping.
96
+ if (question.category in categoryMapping) {
97
+ categoryMapping[question.category].push(question.id);
98
+ } else {
99
+ categoryMapping[question.category] = [question.id];
100
+ const category_option = document.createElement('option');
101
+ category_option.value = question.category;
102
+ category_option.textContent = capitalizeFirstChar(question.category);
103
+ category_select.appendChild(category_option);
104
+ }
105
+ });
106
+ // Set the default category.
107
+ updateQuestionSelect(currentQuestionIndex);
108
+ }
109
+
110
+ function displayQuestion(index) {
111
+ const question = questionMapping[index].question;
112
+ document.getElementById('selected-question').innerHTML = text2Markdown('**Question:** ' + question);
113
+ displayAnswers(index);
114
+ }
115
+
116
+ function displayAnswers(index) {
117
+ const question = questionMapping[index];
118
+ const otherModel = document.getElementById('model-select').value;
119
+ // render the answers with markdown
120
+ document.getElementById('other-model-answer').innerHTML = text2Markdown(question.answers[otherModel]);
121
+ document.getElementById('our-model-answer').innerHTML = text2Markdown(question.answers.vicuna);
122
+
123
+ // Display evaluation
124
+ score = question.scores[otherModel];
125
+ score_text = modelNameMapping[otherModel] + " " + score[0] + "/10, Vicuna-13b " + score[1] + "/10";
126
+ document.getElementById('evaluation-header').textContent = "GPT-4 Evaluation" + " (Score: " + score_text + ")";
127
+ document.getElementById('evaluation-result').innerHTML = text2Markdown(question.evaluations[otherModel]);
128
+
129
+ // Update model names
130
+ let assistant1_title = "Assistant #1"; // (" + modelNameMapping[otherModel] + ")";
131
+ let assistant2_title = "Assistant #2 (Vicuna-13b, our model)";
132
+ // Update scores/labels.
133
+ let assistant1_score_label = score[0].toString() + '/10';
134
+ let assistant2_score_label = score[1].toString() + '/10';
135
+
136
+ const colorRed ='#fa9'; // '#eb978d';
137
+ // const colorGreen = '#c9f2c9';
138
+ const colorBlue = '#8ef'; // '#71dbf9';
139
+ const colorYellow = '#fe7'; // '#fada57';
140
+ let otherModelHeaderColor = '';
141
+ let ourModelHeaderColor = '';
142
+ // Update the winner.
143
+ if (score[0] == score[1]) {
144
+ assistant1_title = '🏆 ' + assistant1_title;
145
+ assistant1_score_label = '🏆 ' + assistant1_score_label;
146
+ assistant2_title = '🏆 ' + assistant2_title;
147
+ assistant2_score_label = '🏆 ' + assistant2_score_label;
148
+ otherModelHeaderColor = colorYellow;
149
+ ourModelHeaderColor = colorYellow;
150
+ } else if (score[0] > score[1]) {
151
+ assistant1_title = '🏆 ' + assistant1_title;
152
+ assistant1_score_label = '🏆 ' + assistant1_score_label;
153
+ otherModelHeaderColor = colorBlue;
154
+ ourModelHeaderColor = colorRed;
155
+ } else if (score[0] < score[1]) {
156
+ assistant2_title = '🏆 ' + assistant2_title;
157
+ assistant2_score_label = '🏆 ' + assistant2_score_label;
158
+ otherModelHeaderColor = colorRed;
159
+ ourModelHeaderColor = colorBlue;
160
+ }
161
+
162
+ document.getElementById('other-model-header-bg').style.backgroundColor = otherModelHeaderColor;
163
+ document.getElementById('our-model-header').style.backgroundColor = ourModelHeaderColor;
164
+
165
+ document.getElementById('other-model-header').textContent = assistant1_title;
166
+ document.getElementById('our-model-header').textContent = assistant2_title;
167
+
168
+ document.getElementById('other-score-label').textContent = assistant1_score_label;
169
+ document.getElementById('our-score-label').textContent = assistant2_score_label;
170
+
171
+ // Update expand buttons visibility for both cards after displaying answers
172
+ // Reset the expanded state and update expand buttons visibility for both cards after displaying answers
173
+ document.querySelectorAll('.expandable-card').forEach(card => {
174
+ card.classList.remove('expanded');
175
+ updateExpandButtonVisibility(card);
176
+ const expandBtn = card.querySelector('.expand-btn');
177
+ expandBtn.innerHTML = '<i class="material-icons" style="pointer-events: none">keyboard_arrow_down</i> Show more'; // .textContent = 'Show more';
178
+ });
179
+ }
180
+
181
+ document.getElementById('question-select').addEventListener('change', e => {
182
+ currentQuestionIndex = parseInt(e.target.value);
183
+ displayQuestion(currentQuestionIndex);
184
+ });
185
+
186
+ document.getElementById('category-select').addEventListener('change', e => {
187
+ let currentCategory = e.target.value;
188
+ const questionIds = categoryMapping[currentCategory];
189
+ currentQuestionIndex = questionIds[0];
190
+ updateQuestionSelect(currentQuestionIndex);
191
+ displayQuestion(currentQuestionIndex);
192
+ });
193
+
194
+ // Update expand buttons whenever the model is changed
195
+ document.getElementById('model-select').addEventListener('change', () => {
196
+ displayAnswers(currentQuestionIndex);
197
+ document.querySelectorAll('.expandable-card').forEach(card => {
198
+ updateExpandButtonVisibility(card);
199
+ });
200
+ updateModelSelect();
201
+ });
202
+
203
+ function switchQuestionAndCategory() {
204
+ document.getElementById('question-select').value = currentQuestionIndex;
205
+ old_category = document.getElementById('category-select').value;
206
+ new_category = questionMapping[currentQuestionIndex].category;
207
+ if (old_category != new_category) {
208
+ document.getElementById('category-select').value = new_category;
209
+ updateQuestionSelect(currentQuestionIndex);
210
+ }
211
+ displayQuestion(currentQuestionIndex);
212
+ }
213
+
214
+ document.getElementById('prev-question').addEventListener('click', () => {
215
+ // Question index starts from 1.
216
+ currentQuestionIndex = Math.max(1, currentQuestionIndex - 1);
217
+ switchQuestionAndCategory();
218
+ });
219
+
220
+ document.getElementById('next-question').addEventListener('click', () => {
221
+ // Question index starts from 1.
222
+ currentQuestionIndex = Math.min(questionsCount, currentQuestionIndex + 1);
223
+ switchQuestionAndCategory();
224
+ });
225
+
226
+ function updateExpandButtonVisibility(card) {
227
+ const cardTextContainer = card.querySelector('.card-text-container');
228
+ const expandBtn = card.querySelector('.expand-btn');
229
+ if (cardTextContainer.scrollHeight > cardTextContainer.offsetHeight) {
230
+ expandBtn.style.display = 'flex';
231
+ } else {
232
+ expandBtn.style.display = 'none';
233
+ card.classList.add('expanded');
234
+ }
235
+ }
236
+
237
+ document.querySelectorAll('.expand-btn').forEach(btn => {
238
+ btn.addEventListener('click', e => {
239
+ const card = e.target.closest('.expandable-card');
240
+ card.classList.toggle('expanded');
241
+ const more = '<i class="material-icons" style="pointer-events: none">keyboard_arrow_down</i> Show more';
242
+ const less = '<i class="material-icons" style="pointer-events: none">keyboard_arrow_up</i> Show less';
243
+ e.target.innerHTML = card.classList.contains('expanded') ? less : more;
244
+ });
245
+ });
llava/eval/webpage/styles.css ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
3
+ background-color: #f8f9fa;
4
+ }
5
+
6
+ .navbar-dark .navbar-nav .nav-link {
7
+ color: #f1cf68;
8
+ font-size: 1.1rem;
9
+ padding: 0.5rem 0.6rem;
10
+ }
11
+
12
+ .card-header {
13
+ font-weight: bold;
14
+ }
15
+
16
+ .card {
17
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
18
+ transition: 0.3s;
19
+ }
20
+
21
+ .card:hover {
22
+ box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
23
+ }
24
+
25
+ button {
26
+ transition: background-color 0.3s;
27
+ }
28
+
29
+ button:hover {
30
+ background-color: #007bff;
31
+ }
32
+
33
+ @media (max-width: 767px) {
34
+ .form-row .form-group {
35
+ margin-bottom: 10px;
36
+ }
37
+ }
38
+
39
+ /* Extra styles */
40
+
41
+ .expandable-card .card-text-container {
42
+ max-height: 200px;
43
+ overflow-y: hidden;
44
+ position: relative;
45
+ }
46
+
47
+ .expandable-card.expanded .card-text-container {
48
+ max-height: none;
49
+ }
50
+
51
+ .expand-btn {
52
+ position: relative;
53
+ display: none;
54
+ background-color: rgba(255, 255, 255, 0.8);
55
+ color: #510c75;
56
+ border-color: transparent;
57
+ }
58
+
59
+ .expand-btn:hover {
60
+ background-color: rgba(200, 200, 200, 0.8);
61
+ text-decoration: none;
62
+ border-color: transparent;
63
+ color: #510c75;
64
+ }
65
+
66
+ .expand-btn:focus {
67
+ outline: none;
68
+ text-decoration: none;
69
+ }
70
+
71
+ .expandable-card:not(.expanded) .card-text-container:after {
72
+ content: "";
73
+ position: absolute;
74
+ bottom: 0;
75
+ left: 0;
76
+ width: 100%;
77
+ height: 90px;
78
+ background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 1));
79
+ }
80
+
81
+ .expandable-card:not(.expanded) .expand-btn {
82
+ margin-top: -40px;
83
+ }
84
+
85
+ .card-body {
86
+ padding-bottom: 5px;
87
+ }
88
+
89
+ .vertical-flex-layout {
90
+ justify-content: center;
91
+ align-items: center;
92
+ height: 100%;
93
+ display: flex;
94
+ flex-direction: column;
95
+ gap: 5px;
96
+ }
97
+
98
+ .figure-img {
99
+ max-width: 100%;
100
+ height: auto;
101
+ }
102
+
103
+ .adjustable-font-size {
104
+ font-size: calc(0.5rem + 2vw);
105
+ }