Spaces:
Sleeping
Sleeping
arunasrivastava
commited on
Commit
·
03346c0
1
Parent(s):
7d25ccc
updating descriptions
Browse files- __pycache__/main.cpython-310.pyc +0 -0
- app.py +6 -5
- main.py +1 -1
- queue/leaderboard.json +30 -0
- queue/results.json +138 -0
- queue/tasks.json +36 -0
__pycache__/main.cpython-310.pyc
CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -135,7 +135,7 @@ with gr.Blocks(css="""
|
|
135 |
box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
|
136 |
}
|
137 |
.styled-table thead tr {
|
138 |
-
background-color: #
|
139 |
color: #ffffff;
|
140 |
text-align: left;
|
141 |
}
|
@@ -157,22 +157,23 @@ with gr.Blocks(css="""
|
|
157 |
## Test Set Information
|
158 |
The test set used for evaluation is from the [TIMIT speech corpus](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech). The TIMIT corpus is a widely used dataset for speech recognition research.
|
159 |
|
160 |
-
##
|
161 |
-
|
162 |
""")
|
163 |
with gr.Tabs():
|
164 |
with gr.TabItem("🏆 Leaderboard"):
|
165 |
leaderboard_html = gr.HTML(create_html_table(format_leaderboard_df(load_leaderboard_data())))
|
166 |
refresh_btn = gr.Button("🔄 Refresh")
|
167 |
refresh_btn.click(
|
168 |
-
lambda:
|
169 |
outputs=leaderboard_html
|
170 |
)
|
171 |
|
|
|
172 |
with gr.TabItem("📝 Submit Model"):
|
173 |
model_name = gr.Textbox(label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft")
|
174 |
submission_name = gr.Textbox(label="Submission Name", placeholder="My Model v1.0")
|
175 |
-
github_url = gr.Textbox(label="
|
176 |
submit_btn = gr.Button("Submit")
|
177 |
result = gr.Textbox(label="Submission Status")
|
178 |
|
|
|
135 |
box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
|
136 |
}
|
137 |
.styled-table thead tr {
|
138 |
+
background-color: #004999;
|
139 |
color: #ffffff;
|
140 |
text-align: left;
|
141 |
}
|
|
|
157 |
## Test Set Information
|
158 |
The test set used for evaluation is from the [TIMIT speech corpus](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech). The TIMIT corpus is a widely used dataset for speech recognition research.
|
159 |
|
160 |
+
## Compute
|
161 |
+
This leaderboard uses the free basic plan (16GB RAM, 2vCPUs). The evaluation may take several hours to complete. Please be patient and do not submit the same model multiple times.
|
162 |
""")
|
163 |
with gr.Tabs():
|
164 |
with gr.TabItem("🏆 Leaderboard"):
|
165 |
leaderboard_html = gr.HTML(create_html_table(format_leaderboard_df(load_leaderboard_data())))
|
166 |
refresh_btn = gr.Button("🔄 Refresh")
|
167 |
refresh_btn.click(
|
168 |
+
lambda: create_html_table(format_leaderboard_df(load_leaderboard_data())),
|
169 |
outputs=leaderboard_html
|
170 |
)
|
171 |
|
172 |
+
|
173 |
with gr.TabItem("📝 Submit Model"):
|
174 |
model_name = gr.Textbox(label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft")
|
175 |
submission_name = gr.Textbox(label="Submission Name", placeholder="My Model v1.0")
|
176 |
+
github_url = gr.Textbox(label="Github/Kaggle/HF URL (optional)", placeholder="https://github.com/username/repo")
|
177 |
submit_btn = gr.Button("Submit")
|
178 |
result = gr.Textbox(label="Submission Status")
|
179 |
|
main.py
CHANGED
@@ -488,7 +488,7 @@ async def get_leaderboard():
|
|
488 |
"""Get current leaderboard"""
|
489 |
try:
|
490 |
leaderboard = storage_manager.load('leaderboard')
|
491 |
-
sorted_leaderboard = sorted(leaderboard, key=lambda x: (x["
|
492 |
return sorted_leaderboard
|
493 |
except Exception as e:
|
494 |
print(f"Error loading leaderboard: {e}")
|
|
|
488 |
"""Get current leaderboard"""
|
489 |
try:
|
490 |
leaderboard = storage_manager.load('leaderboard')
|
491 |
+
sorted_leaderboard = sorted(leaderboard, key=lambda x: (x["average_pwed"], x["average_per"]))
|
492 |
return sorted_leaderboard
|
493 |
except Exception as e:
|
494 |
print(f"Error loading leaderboard: {e}")
|
queue/leaderboard.json
CHANGED
@@ -68,5 +68,35 @@
|
|
68 |
"subset": "test",
|
69 |
"github_url": "https://github.com/KoelLabs/ML/",
|
70 |
"submission_date": "2024-12-12T16:07:25.391145"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
}
|
72 |
]
|
|
|
68 |
"subset": "test",
|
69 |
"github_url": "https://github.com/KoelLabs/ML/",
|
70 |
"submission_date": "2024-12-12T16:07:25.391145"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"submission_id": "02f223d4-7b98-4613-9377-19b74defe308",
|
74 |
+
"submission_name": "wav2vec2 ipa eng ",
|
75 |
+
"model": "snu-nia-12/wav2vec2-large_nia12_phone-ipa_english",
|
76 |
+
"average_per": 0.4847029843149011,
|
77 |
+
"average_pwed": 0.2072006544586948,
|
78 |
+
"subset": "test",
|
79 |
+
"github_url": null,
|
80 |
+
"submission_date": "2024-12-18T22:01:20.855881"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"submission_id": "bed08468-42c7-459f-a46d-49ead50abfbc",
|
84 |
+
"submission_name": "fine-tuned version of facebook/wav2vec2-xls-r-300m on the Timit dataset",
|
85 |
+
"model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
|
86 |
+
"average_per": 0.2561961414705681,
|
87 |
+
"average_pwed": 0.1378394393452702,
|
88 |
+
"subset": "test",
|
89 |
+
"github_url": "https://www.kaggle.com/code/vitouphy/phoneme-recognition-with-wav2vec2",
|
90 |
+
"submission_date": "2024-12-18T22:50:59.627338"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"submission_id": "4086072e-9368-442f-97cd-1fda6bf6656e",
|
94 |
+
"submission_name": "wav2vec2 model",
|
95 |
+
"model": "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000",
|
96 |
+
"average_per": 0.6479484324708775,
|
97 |
+
"average_pwed": 0.18710002665151734,
|
98 |
+
"subset": "test",
|
99 |
+
"github_url": "https://huggingface.co/ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa1000-ns",
|
100 |
+
"submission_date": "2024-12-18T23:29:27.322286"
|
101 |
}
|
102 |
]
|
queue/results.json
CHANGED
@@ -366,5 +366,143 @@
|
|
366 |
}
|
367 |
],
|
368 |
"timestamp": "2024-12-12T16:07:25.389475"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
}
|
370 |
]
|
|
|
366 |
}
|
367 |
],
|
368 |
"timestamp": "2024-12-12T16:07:25.389475"
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"task_id": "2e592612-ca38-4afb-a6a0-3c870b288960",
|
372 |
+
"model": "snu-nia-12/wav2vec2-large_nia12_phone-ipa_english",
|
373 |
+
"subset": "test",
|
374 |
+
"num_files": 1680,
|
375 |
+
"average_per": 0.4847029843149011,
|
376 |
+
"average_pwed": 0.2072006544586948,
|
377 |
+
"detailed_results": [
|
378 |
+
{
|
379 |
+
"file": "data/TEST/DR1/FAKS0/SA1.WAV",
|
380 |
+
"ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
|
381 |
+
"prediction": "ʃihædjʊrdɑrksutɪngrisiwɑʃwɔtərɔljɪrər",
|
382 |
+
"per": 0.42424242424242425,
|
383 |
+
"pwed": 0.15393518518518517
|
384 |
+
},
|
385 |
+
{
|
386 |
+
"file": "data/TEST/DR1/FAKS0/SA2.WAV",
|
387 |
+
"ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
|
388 |
+
"prediction": "doʊntæskmitɪkɛriənɔɪliræglaɪkðətdoʊndt",
|
389 |
+
"per": 0.5,
|
390 |
+
"pwed": 0.2623873873873874
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"file": "data/TEST/DR1/FAKS0/SI1573.WAV",
|
394 |
+
"ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
|
395 |
+
"prediction": "hɪzkæptənwɑzθɪnəndhægərdəndhɪzbjutəfəlbutswərwɔrnəndʃæbiiii",
|
396 |
+
"per": 0.46808510638297873,
|
397 |
+
"pwed": 0.2191091954022989
|
398 |
+
},
|
399 |
+
{
|
400 |
+
"file": "data/TEST/DR1/FAKS0/SI2203.WAV",
|
401 |
+
"ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
|
402 |
+
"prediction": "ðərizənzfərðɪstaɪvsimdfulɪʃnaʊ",
|
403 |
+
"per": 0.20689655172413793,
|
404 |
+
"pwed": 0.054166666666666675
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"file": "data/TEST/DR1/FAKS0/SI943.WAV",
|
408 |
+
"ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
|
409 |
+
"prediction": "prədəkʃənmeɪfɔlfɑrbɪloʊɛkspɛkteɪʃənzpzppppzpdtdtd",
|
410 |
+
"per": 0.7272727272727273,
|
411 |
+
"pwed": 0.34438775510204084
|
412 |
+
}
|
413 |
+
],
|
414 |
+
"timestamp": "2024-12-18T22:01:20.853274"
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"task_id": "d38e65ce-75b5-4dbf-8ade-bff6a5803790",
|
418 |
+
"model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
|
419 |
+
"subset": "test",
|
420 |
+
"num_files": 1680,
|
421 |
+
"average_per": 0.2561961414705681,
|
422 |
+
"average_pwed": 0.1378394393452702,
|
423 |
+
"detailed_results": [
|
424 |
+
{
|
425 |
+
"file": "data/TEST/DR1/FAKS0/SA1.WAV",
|
426 |
+
"ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
|
427 |
+
"prediction": "ʃihædjɝdɑɹksuɾɪngɹisiwɑʃwɑɾɝɑljiɝ",
|
428 |
+
"per": 0.18181818181818182,
|
429 |
+
"pwed": 0.13257575757575757
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"file": "data/TEST/DR1/FAKS0/SA2.WAV",
|
433 |
+
"ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
|
434 |
+
"prediction": "doʊnæskmitɪkæɹiɪnɔɪliɹæglaɪkðæ",
|
435 |
+
"per": 0.21428571428571427,
|
436 |
+
"pwed": 0.10919540229885057
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"file": "data/TEST/DR1/FAKS0/SI1573.WAV",
|
440 |
+
"ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
|
441 |
+
"prediction": "hɪzkætɪnwəsθɪnənhægɝdɪnɪzbjuɾɪflbutswɝwɑɹnɪnʃæbi",
|
442 |
+
"per": 0.19148936170212766,
|
443 |
+
"pwed": 0.0576241134751773
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"file": "data/TEST/DR1/FAKS0/SI2203.WAV",
|
447 |
+
"ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
|
448 |
+
"prediction": "ðɪɹizənzfɝðɪsdaɪvsimdfulɪʃnaʊ",
|
449 |
+
"per": 0.10344827586206896,
|
450 |
+
"pwed": 0.03735632183908046
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"file": "data/TEST/DR1/FAKS0/SI943.WAV",
|
454 |
+
"ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
|
455 |
+
"prediction": "pɹɝdəkʃɪnmeɪfɑlfɹbloʊɛkspɛteɪʃɪns",
|
456 |
+
"per": 0.3333333333333333,
|
457 |
+
"pwed": 0.12373737373737376
|
458 |
+
}
|
459 |
+
],
|
460 |
+
"timestamp": "2024-12-18T22:50:59.625872"
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"task_id": "2839c0c6-8f3b-426e-9eb7-04b6e133dc47",
|
464 |
+
"model": "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000",
|
465 |
+
"subset": "test",
|
466 |
+
"num_files": 1680,
|
467 |
+
"average_per": 0.6479484324708775,
|
468 |
+
"average_pwed": 0.18710002665151734,
|
469 |
+
"detailed_results": [
|
470 |
+
{
|
471 |
+
"file": "data/TEST/DR1/FAKS0/SA1.WAV",
|
472 |
+
"ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
|
473 |
+
"prediction": "ʂixadjodarksyːdɨnɡwisiwaːʃwarɒɔjiːr",
|
474 |
+
"per": 0.6060606060606061,
|
475 |
+
"pwed": 0.15404040404040406
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"file": "data/TEST/DR1/FAKS0/SA2.WAV",
|
479 |
+
"ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
|
480 |
+
"prediction": "dondaːskmiːdɨkɛːɻjɒnojluiʋɻaːɡlɑjɡtaːn",
|
481 |
+
"per": 0.8928571428571429,
|
482 |
+
"pwed": 0.2146464646464646
|
483 |
+
},
|
484 |
+
{
|
485 |
+
"file": "data/TEST/DR1/FAKS0/SI1573.WAV",
|
486 |
+
"ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
|
487 |
+
"prediction": "hizkaːptanustinanhagɛɻdɛnizbiurufubutswuɾʋoːɻninʂaːbi",
|
488 |
+
"per": 0.5106382978723404,
|
489 |
+
"pwed": 0.1096938775510204
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"file": "data/TEST/DR1/FAKS0/SI2203.WAV",
|
493 |
+
"ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
|
494 |
+
"prediction": "ðrisɔnsfrdɔsdaːjvsimtfulɛʂnɛ",
|
495 |
+
"per": 0.5172413793103449,
|
496 |
+
"pwed": 0.11063218390804598
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"file": "data/TEST/DR1/FAKS0/SI943.WAV",
|
500 |
+
"ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
|
501 |
+
"prediction": "pɛdakɕɔnmɛjfaɔfarbuwɔwɛkspɛktajʂɔnt͡s",
|
502 |
+
"per": 0.7272727272727273,
|
503 |
+
"pwed": 0.15
|
504 |
+
}
|
505 |
+
],
|
506 |
+
"timestamp": "2024-12-18T23:29:27.320433"
|
507 |
}
|
508 |
]
|
queue/tasks.json
CHANGED
@@ -88,5 +88,41 @@
|
|
88 |
"github_url": "https://github.com/KoelLabs/ML/",
|
89 |
"status": "completed",
|
90 |
"submitted_at": "2024-12-12T15:53:07.620070"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
}
|
92 |
]
|
|
|
88 |
"github_url": "https://github.com/KoelLabs/ML/",
|
89 |
"status": "completed",
|
90 |
"submitted_at": "2024-12-12T15:53:07.620070"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"id": "2e592612-ca38-4afb-a6a0-3c870b288960",
|
94 |
+
"model": "snu-nia-12/wav2vec2-large_nia12_phone-ipa_english",
|
95 |
+
"subset": "test",
|
96 |
+
"submission_name": "wav2vec2 ipa eng ",
|
97 |
+
"github_url": "",
|
98 |
+
"status": "completed",
|
99 |
+
"submitted_at": "2024-12-18T21:41:21.861322"
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"id": "ac4cbe86-4dbe-4929-8f76-4d2052e0acf1",
|
103 |
+
"model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
|
104 |
+
"subset": "test",
|
105 |
+
"submission_name": "fine-tuned version of facebook/wav2vec2-xls-r-300m on the Timit dataset",
|
106 |
+
"github_url": "https://www.kaggle.com/code/vitouphy/phoneme-recognition-with-wav2vec2",
|
107 |
+
"status": "processing",
|
108 |
+
"submitted_at": "2024-12-18T22:09:03.412372"
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"id": "d38e65ce-75b5-4dbf-8ade-bff6a5803790",
|
112 |
+
"model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
|
113 |
+
"subset": "test",
|
114 |
+
"submission_name": "fine-tuned version of facebook/wav2vec2-xls-r-300m on the Timit dataset",
|
115 |
+
"github_url": "https://www.kaggle.com/code/vitouphy/phoneme-recognition-with-wav2vec2",
|
116 |
+
"status": "completed",
|
117 |
+
"submitted_at": "2024-12-18T22:19:46.817373"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"id": "2839c0c6-8f3b-426e-9eb7-04b6e133dc47",
|
121 |
+
"model": "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000",
|
122 |
+
"subset": "test",
|
123 |
+
"submission_name": "wav2vec2 model",
|
124 |
+
"github_url": "https://huggingface.co/ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa1000-ns",
|
125 |
+
"status": "completed",
|
126 |
+
"submitted_at": "2024-12-18T22:55:36.734691"
|
127 |
}
|
128 |
]
|