Spaces:
Sleeping
Sleeping
Sasha
commited on
Commit
·
20aa046
1
Parent(s):
d8eab79
adding little fixes
Browse files
app.py
CHANGED
@@ -23,11 +23,25 @@ top_datasets= ['glue', 'super_glue', 'wikitext', 'imdb', 'squad', 'squad_es', \
|
|
23 |
'sick', 'xsum', 'wikiann', 'yelp_polarity', 'hellaswag', 'piqa', \
|
24 |
'race', 'winogrande']
|
25 |
|
26 |
-
tasks= ['
|
27 |
-
'
|
28 |
-
'reading comprehension', 'paraphrase identification', 'natural language understanding'
|
|
|
29 |
metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
with st.sidebar.expander("Datasets", expanded=True):
|
32 |
dataset_name = st.selectbox(
|
33 |
f"Choose a dataset to evaluate on:",
|
@@ -59,25 +73,21 @@ st.markdown("For more information about this dataset, check out [its website](ht
|
|
59 |
st.markdown("### Dataset-Specific Metrics")
|
60 |
if dataset_name in metrics:
|
61 |
st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this: :point_down:")
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
else:
|
67 |
st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
|
68 |
dedicated_metric = False
|
69 |
|
70 |
st.markdown("### Task-Specific Metrics")
|
71 |
|
72 |
-
task =
|
73 |
-
try:
|
74 |
-
task = dataset_builder.info.task_templates[0].task
|
75 |
-
except:
|
76 |
-
for t in tasks:
|
77 |
-
if t in str(dataset_builder.info.description).lower():
|
78 |
-
task = t
|
79 |
-
else:
|
80 |
-
continue
|
81 |
|
82 |
if task is not None:
|
83 |
st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
|
|
|
23 |
'sick', 'xsum', 'wikiann', 'yelp_polarity', 'hellaswag', 'piqa', \
|
24 |
'race', 'winogrande']
|
25 |
|
26 |
+
tasks= ['classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
|
27 |
+
'translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
|
28 |
+
'reading comprehension', 'paraphrase identification', 'natural language understanding',\
|
29 |
+
'textual entailment', 'commonsense reasoning', 'summarization']
|
30 |
metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
|
31 |
|
32 |
+
def find_task(dname):
|
33 |
+
task = None
|
34 |
+
dataset_builder = load_dataset_builder(dataset_name, dataset_config)
|
35 |
+
try:
|
36 |
+
task = dataset_builder.info.task_templates[0].task
|
37 |
+
except:
|
38 |
+
for t in tasks:
|
39 |
+
if t in str(dataset_builder.info.description).lower():
|
40 |
+
task = t
|
41 |
+
else:
|
42 |
+
continue
|
43 |
+
return(task)
|
44 |
+
|
45 |
with st.sidebar.expander("Datasets", expanded=True):
|
46 |
dataset_name = st.selectbox(
|
47 |
f"Choose a dataset to evaluate on:",
|
|
|
73 |
st.markdown("### Dataset-Specific Metrics")
|
74 |
if dataset_name in metrics:
|
75 |
st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this: :point_down:")
|
76 |
+
if "glue" in dataset_name:
|
77 |
+
code = ''' from datasets import load_metric
|
78 |
+
metric = load_metric(\"'''+dataset_name+'''\", \"'''+dataset_config+'''\")'''
|
79 |
+
st.code(code, language='python')
|
80 |
+
else:
|
81 |
+
code = ''' from datasets import load_metric
|
82 |
+
metric = load_metric(\"'''+dataset_name+'''\")'''
|
83 |
+
st.code(code, language='python')
|
84 |
else:
|
85 |
st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
|
86 |
dedicated_metric = False
|
87 |
|
88 |
st.markdown("### Task-Specific Metrics")
|
89 |
|
90 |
+
task = find_task(dataset_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
if task is not None:
|
93 |
st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
|