arunasrivastava commited on
Commit
03346c0
·
1 Parent(s): 7d25ccc

updating descriptions

Browse files
__pycache__/main.cpython-310.pyc CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
 
app.py CHANGED
@@ -135,7 +135,7 @@ with gr.Blocks(css="""
135
  box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
136
  }
137
  .styled-table thead tr {
138
- background-color: #96b9D0;
139
  color: #ffffff;
140
  text-align: left;
141
  }
@@ -157,22 +157,23 @@ with gr.Blocks(css="""
157
  ## Test Set Information
158
  The test set used for evaluation is from the [TIMIT speech corpus](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech). The TIMIT corpus is a widely used dataset for speech recognition research.
159
 
160
- ## Processing Time
161
- Please note that processing will take around 2 minutes.
162
  """)
163
  with gr.Tabs():
164
  with gr.TabItem("🏆 Leaderboard"):
165
  leaderboard_html = gr.HTML(create_html_table(format_leaderboard_df(load_leaderboard_data())))
166
  refresh_btn = gr.Button("🔄 Refresh")
167
  refresh_btn.click(
168
- lambda: gr.HTML.update(value=create_html_table(format_leaderboard_df(load_leaderboard_data()))),
169
  outputs=leaderboard_html
170
  )
171
 
 
172
  with gr.TabItem("📝 Submit Model"):
173
  model_name = gr.Textbox(label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft")
174
  submission_name = gr.Textbox(label="Submission Name", placeholder="My Model v1.0")
175
- github_url = gr.Textbox(label="GitHub URL (optional)", placeholder="https://github.com/username/repo")
176
  submit_btn = gr.Button("Submit")
177
  result = gr.Textbox(label="Submission Status")
178
 
 
135
  box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
136
  }
137
  .styled-table thead tr {
138
+ background-color: #004999;
139
  color: #ffffff;
140
  text-align: left;
141
  }
 
157
  ## Test Set Information
158
  The test set used for evaluation is from the [TIMIT speech corpus](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech). The TIMIT corpus is a widely used dataset for speech recognition research.
159
 
160
+ ## Compute
161
+ This leaderboard uses the free basic plan (16GB RAM, 2vCPUs). The evaluation may take several hours to complete. Please be patient and do not submit the same model multiple times.
162
  """)
163
  with gr.Tabs():
164
  with gr.TabItem("🏆 Leaderboard"):
165
  leaderboard_html = gr.HTML(create_html_table(format_leaderboard_df(load_leaderboard_data())))
166
  refresh_btn = gr.Button("🔄 Refresh")
167
  refresh_btn.click(
168
+ lambda: create_html_table(format_leaderboard_df(load_leaderboard_data())),
169
  outputs=leaderboard_html
170
  )
171
 
172
+
173
  with gr.TabItem("📝 Submit Model"):
174
  model_name = gr.Textbox(label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft")
175
  submission_name = gr.Textbox(label="Submission Name", placeholder="My Model v1.0")
176
+ github_url = gr.Textbox(label="Github/Kaggle/HF URL (optional)", placeholder="https://github.com/username/repo")
177
  submit_btn = gr.Button("Submit")
178
  result = gr.Textbox(label="Submission Status")
179
 
main.py CHANGED
@@ -488,7 +488,7 @@ async def get_leaderboard():
488
  """Get current leaderboard"""
489
  try:
490
  leaderboard = storage_manager.load('leaderboard')
491
- sorted_leaderboard = sorted(leaderboard, key=lambda x: (x["average_per"], x["average_pwed"]))
492
  return sorted_leaderboard
493
  except Exception as e:
494
  print(f"Error loading leaderboard: {e}")
 
488
  """Get current leaderboard"""
489
  try:
490
  leaderboard = storage_manager.load('leaderboard')
491
+ sorted_leaderboard = sorted(leaderboard, key=lambda x: (x["average_pwed"], x["average_per"]))
492
  return sorted_leaderboard
493
  except Exception as e:
494
  print(f"Error loading leaderboard: {e}")
queue/leaderboard.json CHANGED
@@ -68,5 +68,35 @@
68
  "subset": "test",
69
  "github_url": "https://github.com/KoelLabs/ML/",
70
  "submission_date": "2024-12-12T16:07:25.391145"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
  ]
 
68
  "subset": "test",
69
  "github_url": "https://github.com/KoelLabs/ML/",
70
  "submission_date": "2024-12-12T16:07:25.391145"
71
+ },
72
+ {
73
+ "submission_id": "02f223d4-7b98-4613-9377-19b74defe308",
74
+ "submission_name": "wav2vec2 ipa eng ",
75
+ "model": "snu-nia-12/wav2vec2-large_nia12_phone-ipa_english",
76
+ "average_per": 0.4847029843149011,
77
+ "average_pwed": 0.2072006544586948,
78
+ "subset": "test",
79
+ "github_url": null,
80
+ "submission_date": "2024-12-18T22:01:20.855881"
81
+ },
82
+ {
83
+ "submission_id": "bed08468-42c7-459f-a46d-49ead50abfbc",
84
+ "submission_name": "fine-tuned version of facebook/wav2vec2-xls-r-300m on the Timit dataset",
85
+ "model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
86
+ "average_per": 0.2561961414705681,
87
+ "average_pwed": 0.1378394393452702,
88
+ "subset": "test",
89
+ "github_url": "https://www.kaggle.com/code/vitouphy/phoneme-recognition-with-wav2vec2",
90
+ "submission_date": "2024-12-18T22:50:59.627338"
91
+ },
92
+ {
93
+ "submission_id": "4086072e-9368-442f-97cd-1fda6bf6656e",
94
+ "submission_name": "wav2vec2 model",
95
+ "model": "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000",
96
+ "average_per": 0.6479484324708775,
97
+ "average_pwed": 0.18710002665151734,
98
+ "subset": "test",
99
+ "github_url": "https://huggingface.co/ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa1000-ns",
100
+ "submission_date": "2024-12-18T23:29:27.322286"
101
  }
102
  ]
queue/results.json CHANGED
@@ -366,5 +366,143 @@
366
  }
367
  ],
368
  "timestamp": "2024-12-12T16:07:25.389475"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  }
370
  ]
 
366
  }
367
  ],
368
  "timestamp": "2024-12-12T16:07:25.389475"
369
+ },
370
+ {
371
+ "task_id": "2e592612-ca38-4afb-a6a0-3c870b288960",
372
+ "model": "snu-nia-12/wav2vec2-large_nia12_phone-ipa_english",
373
+ "subset": "test",
374
+ "num_files": 1680,
375
+ "average_per": 0.4847029843149011,
376
+ "average_pwed": 0.2072006544586948,
377
+ "detailed_results": [
378
+ {
379
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
380
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
381
+ "prediction": "ʃihædjʊrdɑrksutɪngrisiwɑʃwɔtərɔljɪrər",
382
+ "per": 0.42424242424242425,
383
+ "pwed": 0.15393518518518517
384
+ },
385
+ {
386
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
387
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
388
+ "prediction": "doʊntæskmitɪkɛriənɔɪliræglaɪkðətdoʊndt",
389
+ "per": 0.5,
390
+ "pwed": 0.2623873873873874
391
+ },
392
+ {
393
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
394
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
395
+ "prediction": "hɪzkæptənwɑzθɪnəndhægərdəndhɪzbjutəfəlbutswərwɔrnəndʃæbiiii",
396
+ "per": 0.46808510638297873,
397
+ "pwed": 0.2191091954022989
398
+ },
399
+ {
400
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
401
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
402
+ "prediction": "ðərizənzfərðɪstaɪvsimdfulɪʃnaʊ",
403
+ "per": 0.20689655172413793,
404
+ "pwed": 0.054166666666666675
405
+ },
406
+ {
407
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
408
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
409
+ "prediction": "prədəkʃənmeɪfɔlfɑrbɪloʊɛkspɛkteɪʃənzpzppppzpdtdtd",
410
+ "per": 0.7272727272727273,
411
+ "pwed": 0.34438775510204084
412
+ }
413
+ ],
414
+ "timestamp": "2024-12-18T22:01:20.853274"
415
+ },
416
+ {
417
+ "task_id": "d38e65ce-75b5-4dbf-8ade-bff6a5803790",
418
+ "model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
419
+ "subset": "test",
420
+ "num_files": 1680,
421
+ "average_per": 0.2561961414705681,
422
+ "average_pwed": 0.1378394393452702,
423
+ "detailed_results": [
424
+ {
425
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
426
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
427
+ "prediction": "ʃihædjɝdɑɹksuɾɪngɹisiwɑʃwɑɾɝɑljiɝ",
428
+ "per": 0.18181818181818182,
429
+ "pwed": 0.13257575757575757
430
+ },
431
+ {
432
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
433
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
434
+ "prediction": "doʊnæskmitɪkæɹiɪnɔɪliɹæglaɪkðæ",
435
+ "per": 0.21428571428571427,
436
+ "pwed": 0.10919540229885057
437
+ },
438
+ {
439
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
440
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
441
+ "prediction": "hɪzkætɪnwəsθɪnənhægɝdɪnɪzbjuɾɪflbutswɝwɑɹnɪnʃæbi",
442
+ "per": 0.19148936170212766,
443
+ "pwed": 0.0576241134751773
444
+ },
445
+ {
446
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
447
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
448
+ "prediction": "ðɪɹizənzfɝðɪsdaɪvsimdfulɪʃnaʊ",
449
+ "per": 0.10344827586206896,
450
+ "pwed": 0.03735632183908046
451
+ },
452
+ {
453
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
454
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
455
+ "prediction": "pɹɝdəkʃɪnmeɪfɑlfɹbloʊɛkspɛteɪʃɪns",
456
+ "per": 0.3333333333333333,
457
+ "pwed": 0.12373737373737376
458
+ }
459
+ ],
460
+ "timestamp": "2024-12-18T22:50:59.625872"
461
+ },
462
+ {
463
+ "task_id": "2839c0c6-8f3b-426e-9eb7-04b6e133dc47",
464
+ "model": "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000",
465
+ "subset": "test",
466
+ "num_files": 1680,
467
+ "average_per": 0.6479484324708775,
468
+ "average_pwed": 0.18710002665151734,
469
+ "detailed_results": [
470
+ {
471
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
472
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
473
+ "prediction": "ʂixadjodarksyːdɨnɡwisiwaːʃwarɒɔjiːr",
474
+ "per": 0.6060606060606061,
475
+ "pwed": 0.15404040404040406
476
+ },
477
+ {
478
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
479
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
480
+ "prediction": "dondaːskmiːdɨkɛːɻjɒnojluiʋɻaːɡlɑjɡtaːn",
481
+ "per": 0.8928571428571429,
482
+ "pwed": 0.2146464646464646
483
+ },
484
+ {
485
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
486
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
487
+ "prediction": "hizkaːptanustinanhagɛɻdɛnizbiurufubutswuɾʋoːɻninʂaːbi",
488
+ "per": 0.5106382978723404,
489
+ "pwed": 0.1096938775510204
490
+ },
491
+ {
492
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
493
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
494
+ "prediction": "ðrisɔnsfrdɔsdaːjvsimtfulɛʂnɛ",
495
+ "per": 0.5172413793103449,
496
+ "pwed": 0.11063218390804598
497
+ },
498
+ {
499
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
500
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
501
+ "prediction": "pɛdakɕɔnmɛjfaɔfarbuwɔwɛkspɛktajʂɔnt͡s",
502
+ "per": 0.7272727272727273,
503
+ "pwed": 0.15
504
+ }
505
+ ],
506
+ "timestamp": "2024-12-18T23:29:27.320433"
507
  }
508
  ]
queue/tasks.json CHANGED
@@ -88,5 +88,41 @@
88
  "github_url": "https://github.com/KoelLabs/ML/",
89
  "status": "completed",
90
  "submitted_at": "2024-12-12T15:53:07.620070"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  }
92
  ]
 
88
  "github_url": "https://github.com/KoelLabs/ML/",
89
  "status": "completed",
90
  "submitted_at": "2024-12-12T15:53:07.620070"
91
+ },
92
+ {
93
+ "id": "2e592612-ca38-4afb-a6a0-3c870b288960",
94
+ "model": "snu-nia-12/wav2vec2-large_nia12_phone-ipa_english",
95
+ "subset": "test",
96
+ "submission_name": "wav2vec2 ipa eng ",
97
+ "github_url": "",
98
+ "status": "completed",
99
+ "submitted_at": "2024-12-18T21:41:21.861322"
100
+ },
101
+ {
102
+ "id": "ac4cbe86-4dbe-4929-8f76-4d2052e0acf1",
103
+ "model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
104
+ "subset": "test",
105
+ "submission_name": "fine-tuned version of facebook/wav2vec2-xls-r-300m on the Timit dataset",
106
+ "github_url": "https://www.kaggle.com/code/vitouphy/phoneme-recognition-with-wav2vec2",
107
+ "status": "processing",
108
+ "submitted_at": "2024-12-18T22:09:03.412372"
109
+ },
110
+ {
111
+ "id": "d38e65ce-75b5-4dbf-8ade-bff6a5803790",
112
+ "model": "vitouphy/wav2vec2-xls-r-300m-timit-phoneme",
113
+ "subset": "test",
114
+ "submission_name": "fine-tuned version of facebook/wav2vec2-xls-r-300m on the Timit dataset",
115
+ "github_url": "https://www.kaggle.com/code/vitouphy/phoneme-recognition-with-wav2vec2",
116
+ "status": "completed",
117
+ "submitted_at": "2024-12-18T22:19:46.817373"
118
+ },
119
+ {
120
+ "id": "2839c0c6-8f3b-426e-9eb7-04b6e133dc47",
121
+ "model": "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000",
122
+ "subset": "test",
123
+ "submission_name": "wav2vec2 model",
124
+ "github_url": "https://huggingface.co/ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa1000-ns",
125
+ "status": "completed",
126
+ "submitted_at": "2024-12-18T22:55:36.734691"
127
  }
128
  ]