panuthept commited on
Commit
767e8cd
·
1 Parent(s): fea3156

remove tag

Browse files
Files changed (1) hide show
  1. app.py +5 -36
app.py CHANGED
@@ -22,17 +22,14 @@ The evaluation is conducted on 8 datasets across 4 tasks:
22
  - TyDiQA (Thai only), contains 763 test samples, https://huggingface.co/datasets/chompk/tydiqa-goldp-th
23
  ## Metrics
24
  The evaluation metrics for each task are as follows:
25
- 1. STS -> Spearman correlation
26
- 2. Text Classification -> F1
27
- 3. Pair Classification -> Average Precision
28
- 3. Retrieval -> MMR@10
29
- ## Tagging
30
- 🟢 Open sourced 📦 API
31
  """
32
 
33
  results = [
34
  {
35
- 'Type': '🟢',
36
  'Model Name': '[XLMR-base](https://huggingface.co/FacebookAI/xlm-roberta-base)',
37
  'Model Size (Million Parameters)': 279,
38
  'Embedding Dimensions': 768,
@@ -43,7 +40,6 @@ results = [
43
  'Retrieval (3 datasets)': 5.57,
44
  },
45
  {
46
- 'Type': '🟢',
47
  'Model Name': '[XLMR-large](https://huggingface.co/FacebookAI/xlm-roberta-large)',
48
  'Model Size (Million Parameters)': 561,
49
  'Embedding Dimensions': 1024,
@@ -54,7 +50,6 @@ results = [
54
  'Retrieval (3 datasets)': 11.80,
55
  },
56
  {
57
- 'Type': '🟢',
58
  'Model Name': '[WangchanBERTa](https://huggingface.co/airesearch/wangchanberta-base-att-spm-uncased)',
59
  'Model Size (Million Parameters)': 106,
60
  'Embedding Dimensions': 768,
@@ -65,7 +60,6 @@ results = [
65
  'Retrieval (3 datasets)': 19.49,
66
  },
67
  {
68
- 'Type': '🟢',
69
  'Model Name': '[PhayaThaiBERT](https://huggingface.co/clicknext/phayathaibert)',
70
  'Model Size (Million Parameters)': 278,
71
  'Embedding Dimensions': 768,
@@ -76,7 +70,6 @@ results = [
76
  'Retrieval (3 datasets)': 56.31,
77
  },
78
  {
79
- 'Type': '🟢',
80
  'Model Name': '[MPNet-multilingual](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2)',
81
  'Model Size (Million Parameters)': 278,
82
  'Embedding Dimensions': 768,
@@ -87,7 +80,6 @@ results = [
87
  'Retrieval (3 datasets)': 64.13,
88
  },
89
  {
90
- 'Type': '🟢',
91
  'Model Name': '[DistilUSE-multilingual](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)',
92
  'Model Size (Million Parameters)': 135,
93
  'Embedding Dimensions': 512,
@@ -98,7 +90,6 @@ results = [
98
  'Retrieval (3 datasets)': 42.72,
99
  },
100
  {
101
- 'Type': '🟢',
102
  'Model Name': '[BGE-M3](https://huggingface.co/BAAI/bge-m3)',
103
  'Model Size (Million Parameters)': 570,
104
  'Embedding Dimensions': 1024,
@@ -109,7 +100,6 @@ results = [
109
  'Retrieval (3 datasets)': 91.42,
110
  },
111
  {
112
- 'Type': '🟢',
113
  'Model Name': '[SimCSE-XLMR-base](https://huggingface.co/kornwtp/simcse-model-XLMR)',
114
  'Model Size (Million Parameters)': 279,
115
  'Embedding Dimensions': 768,
@@ -120,7 +110,6 @@ results = [
120
  'Retrieval (3 datasets)': 54.17,
121
  },
122
  {
123
- 'Type': '🟢',
124
  'Model Name': '[SimCSE-WangchanBERTa](https://huggingface.co/kornwtp/simcse-model-wangchanberta)',
125
  'Model Size (Million Parameters)': 106,
126
  'Embedding Dimensions': 768,
@@ -131,7 +120,6 @@ results = [
131
  'Retrieval (3 datasets)': 51.05,
132
  },
133
  {
134
- 'Type': '🟢',
135
  'Model Name': '[SimCSE-PhayaThaiBERT](https://huggingface.co/kornwtp/simcse-model-phayathaibert)',
136
  'Model Size (Million Parameters)': 278,
137
  'Embedding Dimensions': 768,
@@ -142,7 +130,6 @@ results = [
142
  'Retrieval (3 datasets)': 66.05,
143
  },
144
  {
145
- 'Type': '🟢',
146
  'Model Name': '[SCT-XLMR-base](https://huggingface.co/kornwtp/SCT-model-XLMR)',
147
  'Model Size (Million Parameters)': 279,
148
  'Embedding Dimensions': 768,
@@ -153,7 +140,6 @@ results = [
153
  'Retrieval (3 datasets)': 54.90,
154
  },
155
  {
156
- 'Type': '🟢',
157
  'Model Name': '[SCT-WangchanBERTa](https://huggingface.co/kornwtp/SCT-model-wangchanberta)',
158
  'Model Size (Million Parameters)': 106,
159
  'Embedding Dimensions': 768,
@@ -164,7 +150,6 @@ results = [
164
  'Retrieval (3 datasets)': 63.83,
165
  },
166
  {
167
- 'Type': '🟢',
168
  'Model Name': '[SCT-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-model-phayathaibert)',
169
  'Model Size (Million Parameters)': 278,
170
  'Embedding Dimensions': 768,
@@ -175,7 +160,6 @@ results = [
175
  'Retrieval (3 datasets)': 66.20,
176
  },
177
  {
178
- 'Type': '🟢',
179
  'Model Name': '[SCT-KD-XLMR-base](https://huggingface.co/kornwtp/SCT-KD-model-XLMR)',
180
  'Model Size (Million Parameters)': 279,
181
  'Embedding Dimensions': 768,
@@ -186,7 +170,6 @@ results = [
186
  'Retrieval (3 datasets)': 65.02,
187
  },
188
  {
189
- 'Type': '🟢',
190
  'Model Name': '[SCT-KD-WangchanBERTa](https://huggingface.co/kornwtp/SCT-KD-model-wangchanberta)',
191
  'Model Size (Million Parameters)': 106,
192
  'Embedding Dimensions': 768,
@@ -197,7 +180,6 @@ results = [
197
  'Retrieval (3 datasets)': 62.38,
198
  },
199
  {
200
- 'Type': '🟢',
201
  'Model Name': '[SCT-KD-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-KD-model-phayathaibert)',
202
  'Model Size (Million Parameters)': 278,
203
  'Embedding Dimensions': 768,
@@ -208,7 +190,6 @@ results = [
208
  'Retrieval (3 datasets)': 67.94,
209
  },
210
  {
211
- 'Type': '🟢',
212
  'Model Name': '[ConGen-XLMR-base](https://huggingface.co/kornwtp/ConGen-model-XLMR)',
213
  'Model Size (Million Parameters)': 279,
214
  'Embedding Dimensions': 768,
@@ -219,7 +200,6 @@ results = [
219
  'Retrieval (3 datasets)': 68.03,
220
  },
221
  {
222
- 'Type': '🟢',
223
  'Model Name': '[ConGen-WangchanBERTa](https://huggingface.co/kornwtp/ConGen-model-wangchanberta)',
224
  'Model Size (Million Parameters)': 106,
225
  'Embedding Dimensions': 768,
@@ -230,7 +210,6 @@ results = [
230
  'Retrieval (3 datasets)': 67.66,
231
  },
232
  {
233
- 'Type': '🟢',
234
  'Model Name': '[ConGen-PhayaThaiBERT](https://huggingface.co/kornwtp/ConGen-model-phayathaibert)',
235
  'Model Size (Million Parameters)': 278,
236
  'Embedding Dimensions': 768,
@@ -241,7 +220,6 @@ results = [
241
  'Retrieval (3 datasets)': 68.04,
242
  },
243
  {
244
- 'Type': '🟢',
245
  'Model Name': '[E5-Mistral-7B-Instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)',
246
  'Model Size (Million Parameters)': 7110,
247
  'Embedding Dimensions': 4096,
@@ -252,7 +230,6 @@ results = [
252
  'Retrieval (3 datasets)': 86.80,
253
  },
254
  {
255
- 'Type': '🟢',
256
  'Model Name': '[gte-Qwen2-7B-Instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)',
257
  'Model Size (Million Parameters)': 7610,
258
  'Embedding Dimensions': 3584,
@@ -263,7 +240,6 @@ results = [
263
  'Retrieval (3 datasets)': 38.31,
264
  },
265
  {
266
- 'Type': '🟢',
267
  'Model Name': '[GritLM-7B](https://huggingface.co/GritLM/GritLM-7B)',
268
  'Model Size (Million Parameters)': 7240,
269
  'Embedding Dimensions': 4096,
@@ -274,7 +250,7 @@ results = [
274
  'Retrieval (3 datasets)': 22.79,
275
  },
276
  {
277
- 'Type': '🟢',
278
  'Model Name': '[Llama3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)',
279
  'Model Size (Million Parameters)': 8030,
280
  'Embedding Dimensions': 4096,
@@ -285,7 +261,6 @@ results = [
285
  'Retrieval (3 datasets)': 47.93,
286
  },
287
  {
288
- 'Type': '🟢',
289
  'Model Name': '[Llama3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)',
290
  'Model Size (Million Parameters)': 8030,
291
  'Embedding Dimensions': 4096,
@@ -296,7 +271,6 @@ results = [
296
  'Retrieval (3 datasets)': 50.38,
297
  },
298
  {
299
- 'Type': '🟢',
300
  'Model Name': '[Llama3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)',
301
  'Model Size (Million Parameters)': 8030,
302
  'Embedding Dimensions': 4096,
@@ -307,7 +281,6 @@ results = [
307
  'Retrieval (3 datasets)': 43.64,
308
  },
309
  {
310
- 'Type': '🟢',
311
  'Model Name': '[Llama3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)',
312
  'Model Size (Million Parameters)': 8030,
313
  'Embedding Dimensions': 4096,
@@ -318,7 +291,6 @@ results = [
318
  'Retrieval (3 datasets)': 43.63,
319
  },
320
  {
321
- 'Type': '🟢',
322
  'Model Name': '[Typhoon-8B-Instruct](https://huggingface.co/scb10x/llama-3-typhoon-v1.5-8b-instruct)',
323
  'Model Size (Million Parameters)': 8030,
324
  'Embedding Dimensions': 4096,
@@ -329,7 +301,6 @@ results = [
329
  'Retrieval (3 datasets)': 52.65,
330
  },
331
  {
332
- 'Type': '📦',
333
  'Model Name': 'Cohere-embed-multilingual-v2.0',
334
  'Model Size (Million Parameters)': "N/A",
335
  'Embedding Dimensions': 768,
@@ -340,7 +311,6 @@ results = [
340
  'Retrieval (3 datasets)': 85.23,
341
  },
342
  {
343
- 'Type': '📦',
344
  'Model Name': 'Cohere-embed-multilingual-v3.0',
345
  'Model Size (Million Parameters)': "N/A",
346
  'Embedding Dimensions': 1024,
@@ -351,7 +321,6 @@ results = [
351
  'Retrieval (3 datasets)': 91.43,
352
  },
353
  {
354
- 'Type': '📦',
355
  'Model Name': 'Openai-text-embedding-3-large',
356
  'Model Size (Million Parameters)': "N/A",
357
  'Embedding Dimensions': 3072,
 
22
  - TyDiQA (Thai only), contains 763 test samples, https://huggingface.co/datasets/chompk/tydiqa-goldp-th
23
  ## Metrics
24
  The evaluation metrics for each task are as follows:
25
+ 1. STS: Spearman’s rank correlation
26
+ 2. Text Classification: F1
27
+ 3. Pair Classification: Average Precision
28
+ 3. Retrieval: MMR@10
 
 
29
  """
30
 
31
  results = [
32
  {
 
33
  'Model Name': '[XLMR-base](https://huggingface.co/FacebookAI/xlm-roberta-base)',
34
  'Model Size (Million Parameters)': 279,
35
  'Embedding Dimensions': 768,
 
40
  'Retrieval (3 datasets)': 5.57,
41
  },
42
  {
 
43
  'Model Name': '[XLMR-large](https://huggingface.co/FacebookAI/xlm-roberta-large)',
44
  'Model Size (Million Parameters)': 561,
45
  'Embedding Dimensions': 1024,
 
50
  'Retrieval (3 datasets)': 11.80,
51
  },
52
  {
 
53
  'Model Name': '[WangchanBERTa](https://huggingface.co/airesearch/wangchanberta-base-att-spm-uncased)',
54
  'Model Size (Million Parameters)': 106,
55
  'Embedding Dimensions': 768,
 
60
  'Retrieval (3 datasets)': 19.49,
61
  },
62
  {
 
63
  'Model Name': '[PhayaThaiBERT](https://huggingface.co/clicknext/phayathaibert)',
64
  'Model Size (Million Parameters)': 278,
65
  'Embedding Dimensions': 768,
 
70
  'Retrieval (3 datasets)': 56.31,
71
  },
72
  {
 
73
  'Model Name': '[MPNet-multilingual](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2)',
74
  'Model Size (Million Parameters)': 278,
75
  'Embedding Dimensions': 768,
 
80
  'Retrieval (3 datasets)': 64.13,
81
  },
82
  {
 
83
  'Model Name': '[DistilUSE-multilingual](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)',
84
  'Model Size (Million Parameters)': 135,
85
  'Embedding Dimensions': 512,
 
90
  'Retrieval (3 datasets)': 42.72,
91
  },
92
  {
 
93
  'Model Name': '[BGE-M3](https://huggingface.co/BAAI/bge-m3)',
94
  'Model Size (Million Parameters)': 570,
95
  'Embedding Dimensions': 1024,
 
100
  'Retrieval (3 datasets)': 91.42,
101
  },
102
  {
 
103
  'Model Name': '[SimCSE-XLMR-base](https://huggingface.co/kornwtp/simcse-model-XLMR)',
104
  'Model Size (Million Parameters)': 279,
105
  'Embedding Dimensions': 768,
 
110
  'Retrieval (3 datasets)': 54.17,
111
  },
112
  {
 
113
  'Model Name': '[SimCSE-WangchanBERTa](https://huggingface.co/kornwtp/simcse-model-wangchanberta)',
114
  'Model Size (Million Parameters)': 106,
115
  'Embedding Dimensions': 768,
 
120
  'Retrieval (3 datasets)': 51.05,
121
  },
122
  {
 
123
  'Model Name': '[SimCSE-PhayaThaiBERT](https://huggingface.co/kornwtp/simcse-model-phayathaibert)',
124
  'Model Size (Million Parameters)': 278,
125
  'Embedding Dimensions': 768,
 
130
  'Retrieval (3 datasets)': 66.05,
131
  },
132
  {
 
133
  'Model Name': '[SCT-XLMR-base](https://huggingface.co/kornwtp/SCT-model-XLMR)',
134
  'Model Size (Million Parameters)': 279,
135
  'Embedding Dimensions': 768,
 
140
  'Retrieval (3 datasets)': 54.90,
141
  },
142
  {
 
143
  'Model Name': '[SCT-WangchanBERTa](https://huggingface.co/kornwtp/SCT-model-wangchanberta)',
144
  'Model Size (Million Parameters)': 106,
145
  'Embedding Dimensions': 768,
 
150
  'Retrieval (3 datasets)': 63.83,
151
  },
152
  {
 
153
  'Model Name': '[SCT-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-model-phayathaibert)',
154
  'Model Size (Million Parameters)': 278,
155
  'Embedding Dimensions': 768,
 
160
  'Retrieval (3 datasets)': 66.20,
161
  },
162
  {
 
163
  'Model Name': '[SCT-KD-XLMR-base](https://huggingface.co/kornwtp/SCT-KD-model-XLMR)',
164
  'Model Size (Million Parameters)': 279,
165
  'Embedding Dimensions': 768,
 
170
  'Retrieval (3 datasets)': 65.02,
171
  },
172
  {
 
173
  'Model Name': '[SCT-KD-WangchanBERTa](https://huggingface.co/kornwtp/SCT-KD-model-wangchanberta)',
174
  'Model Size (Million Parameters)': 106,
175
  'Embedding Dimensions': 768,
 
180
  'Retrieval (3 datasets)': 62.38,
181
  },
182
  {
 
183
  'Model Name': '[SCT-KD-PhayaThaiBERT](https://huggingface.co/kornwtp/SCT-KD-model-phayathaibert)',
184
  'Model Size (Million Parameters)': 278,
185
  'Embedding Dimensions': 768,
 
190
  'Retrieval (3 datasets)': 67.94,
191
  },
192
  {
 
193
  'Model Name': '[ConGen-XLMR-base](https://huggingface.co/kornwtp/ConGen-model-XLMR)',
194
  'Model Size (Million Parameters)': 279,
195
  'Embedding Dimensions': 768,
 
200
  'Retrieval (3 datasets)': 68.03,
201
  },
202
  {
 
203
  'Model Name': '[ConGen-WangchanBERTa](https://huggingface.co/kornwtp/ConGen-model-wangchanberta)',
204
  'Model Size (Million Parameters)': 106,
205
  'Embedding Dimensions': 768,
 
210
  'Retrieval (3 datasets)': 67.66,
211
  },
212
  {
 
213
  'Model Name': '[ConGen-PhayaThaiBERT](https://huggingface.co/kornwtp/ConGen-model-phayathaibert)',
214
  'Model Size (Million Parameters)': 278,
215
  'Embedding Dimensions': 768,
 
220
  'Retrieval (3 datasets)': 68.04,
221
  },
222
  {
 
223
  'Model Name': '[E5-Mistral-7B-Instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)',
224
  'Model Size (Million Parameters)': 7110,
225
  'Embedding Dimensions': 4096,
 
230
  'Retrieval (3 datasets)': 86.80,
231
  },
232
  {
 
233
  'Model Name': '[gte-Qwen2-7B-Instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)',
234
  'Model Size (Million Parameters)': 7610,
235
  'Embedding Dimensions': 3584,
 
240
  'Retrieval (3 datasets)': 38.31,
241
  },
242
  {
 
243
  'Model Name': '[GritLM-7B](https://huggingface.co/GritLM/GritLM-7B)',
244
  'Model Size (Million Parameters)': 7240,
245
  'Embedding Dimensions': 4096,
 
250
  'Retrieval (3 datasets)': 22.79,
251
  },
252
  {
253
+
254
  'Model Name': '[Llama3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)',
255
  'Model Size (Million Parameters)': 8030,
256
  'Embedding Dimensions': 4096,
 
261
  'Retrieval (3 datasets)': 47.93,
262
  },
263
  {
 
264
  'Model Name': '[Llama3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)',
265
  'Model Size (Million Parameters)': 8030,
266
  'Embedding Dimensions': 4096,
 
271
  'Retrieval (3 datasets)': 50.38,
272
  },
273
  {
 
274
  'Model Name': '[Llama3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)',
275
  'Model Size (Million Parameters)': 8030,
276
  'Embedding Dimensions': 4096,
 
281
  'Retrieval (3 datasets)': 43.64,
282
  },
283
  {
 
284
  'Model Name': '[Llama3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)',
285
  'Model Size (Million Parameters)': 8030,
286
  'Embedding Dimensions': 4096,
 
291
  'Retrieval (3 datasets)': 43.63,
292
  },
293
  {
 
294
  'Model Name': '[Typhoon-8B-Instruct](https://huggingface.co/scb10x/llama-3-typhoon-v1.5-8b-instruct)',
295
  'Model Size (Million Parameters)': 8030,
296
  'Embedding Dimensions': 4096,
 
301
  'Retrieval (3 datasets)': 52.65,
302
  },
303
  {
 
304
  'Model Name': 'Cohere-embed-multilingual-v2.0',
305
  'Model Size (Million Parameters)': "N/A",
306
  'Embedding Dimensions': 768,
 
311
  'Retrieval (3 datasets)': 85.23,
312
  },
313
  {
 
314
  'Model Name': 'Cohere-embed-multilingual-v3.0',
315
  'Model Size (Million Parameters)': "N/A",
316
  'Embedding Dimensions': 1024,
 
321
  'Retrieval (3 datasets)': 91.43,
322
  },
323
  {
 
324
  'Model Name': 'Openai-text-embedding-3-large',
325
  'Model Size (Million Parameters)': "N/A",
326
  'Embedding Dimensions': 3072,