theo commited on
Commit
8860d0f
·
1 Parent(s): c4882f0

fix inputs on others

Browse files
Files changed (1) hide show
  1. tagging_app.py +19 -14
tagging_app.py CHANGED
@@ -5,9 +5,14 @@ from typing import Callable, Dict, List, Tuple
5
  import langcodes as lc
6
  import streamlit as st
7
  import yaml
8
- from datasets.utils.metadata import (DatasetMetadata, known_creators,
9
- known_licenses, known_multilingualities,
10
- known_size_categories, known_task_ids)
 
 
 
 
 
11
 
12
  st.set_page_config(
13
  page_title="HF Dataset Tagging App",
@@ -173,21 +178,21 @@ state["task_categories"] = multiselect(
173
  format_func=lambda tg: f"{tg}: {known_task_ids[tg]['description']}",
174
  )
175
  task_specifics = []
176
- for tg in state["task_categories"]:
177
  specs = multiselect(
178
  leftcol,
179
- f"Specific _{tg}_ tasks",
180
  f"What specific tasks does the dataset support?",
181
- values=[ts for ts in (state["task_ids"] or []) if ts in known_task_ids[tg]["options"]],
182
- valid_set=known_task_ids[tg]["options"],
183
  )
184
  if "other" in specs:
185
- other_task = st.text_input(
186
  "You selected 'other' task. Please enter a short hyphen-separated description for the task:",
187
  value="my-task-description",
188
  )
189
- st.write(f"Registering {tg}-other-{other_task} task")
190
- specs[specs.index("other")] = f"{tg}-other-{other_task}"
191
  task_specifics += specs
192
  state["task_ids"] = task_specifics
193
 
@@ -203,11 +208,11 @@ state["multilinguality"] = multiselect(
203
  )
204
 
205
  if "other" in state["multilinguality"]:
206
- other_multilinguality = st.text_input(
207
  "You selected 'other' type of multilinguality. Please enter a short hyphen-separated description:",
208
  value="my-multilinguality",
209
  )
210
- st.write(f"Registering other-{other_multilinguality} multilinguality")
211
  state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
212
 
213
  valid_values, invalid_values = list(), list()
@@ -286,11 +291,11 @@ if "extended" in state["extended"]:
286
  valid_set=all_dataset_ids + ["other"],
287
  )
288
  if "other" in extended_sources:
289
- other_extended_sources = st.text_input(
290
  "You selected 'other' dataset. Please enter a short hyphen-separated description:",
291
  value="my-dataset",
292
  )
293
- st.write(f"Registering other-{other_extended_sources} dataset")
294
  extended_sources[extended_sources.index("other")] = f"other-{other_extended_sources}"
295
  state["source_datasets"] += [f"extended|{src}" for src in extended_sources]
296
 
 
5
  import langcodes as lc
6
  import streamlit as st
7
  import yaml
8
+ from datasets.utils.metadata import (
9
+ DatasetMetadata,
10
+ known_creators,
11
+ known_licenses,
12
+ known_multilingualities,
13
+ known_size_categories,
14
+ known_task_ids,
15
+ )
16
 
17
  st.set_page_config(
18
  page_title="HF Dataset Tagging App",
 
178
  format_func=lambda tg: f"{tg}: {known_task_ids[tg]['description']}",
179
  )
180
  task_specifics = []
181
+ for task_category in state["task_categories"]:
182
  specs = multiselect(
183
  leftcol,
184
+ f"Specific _{task_category}_ tasks",
185
  f"What specific tasks does the dataset support?",
186
+ values=[ts for ts in (state["task_ids"] or []) if ts in known_task_ids[task_category]["options"]],
187
+ valid_set=known_task_ids[task_category]["options"],
188
  )
189
  if "other" in specs:
190
+ other_task = leftcol.text_input(
191
  "You selected 'other' task. Please enter a short hyphen-separated description for the task:",
192
  value="my-task-description",
193
  )
194
+ leftcol.write(f"Registering {task_category}-other-{other_task} task")
195
+ specs[specs.index("other")] = f"{task_category}-other-{other_task}"
196
  task_specifics += specs
197
  state["task_ids"] = task_specifics
198
 
 
208
  )
209
 
210
  if "other" in state["multilinguality"]:
211
+ other_multilinguality = leftcol.text_input(
212
  "You selected 'other' type of multilinguality. Please enter a short hyphen-separated description:",
213
  value="my-multilinguality",
214
  )
215
+ leftcol.write(f"Registering other-{other_multilinguality} multilinguality")
216
  state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
217
 
218
  valid_values, invalid_values = list(), list()
 
291
  valid_set=all_dataset_ids + ["other"],
292
  )
293
  if "other" in extended_sources:
294
+ other_extended_sources = leftcol.text_input(
295
  "You selected 'other' dataset. Please enter a short hyphen-separated description:",
296
  value="my-dataset",
297
  )
298
+ leftcol.write(f"Registering other-{other_extended_sources} dataset")
299
  extended_sources[extended_sources.index("other")] = f"other-{other_extended_sources}"
300
  state["source_datasets"] += [f"extended|{src}" for src in extended_sources]
301