Spaces:
Running
Running
theo
commited on
Commit
Β·
1cc3978
1
Parent(s):
326ad7e
add a validator input
Browse files- requirements.txt +1 -0
- tagging_app.py +29 -19
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
pyyaml
|
2 |
datasets
|
3 |
streamlit
|
|
|
|
1 |
pyyaml
|
2 |
datasets
|
3 |
streamlit
|
4 |
+
langcodes[data]
|
tagging_app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import json
|
2 |
from pathlib import Path
|
3 |
-
from typing import Callable, List, Tuple
|
4 |
|
5 |
import streamlit as st
|
6 |
import yaml
|
@@ -85,6 +85,21 @@ def multiselect(
|
|
85 |
return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func)
|
86 |
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
def new_state():
|
89 |
return {
|
90 |
"task_categories": [],
|
@@ -155,17 +170,7 @@ if rightbtn.button("flush state"):
|
|
155 |
st.experimental_set_query_params()
|
156 |
|
157 |
if preloaded_id is not None and initial_state is not None:
|
158 |
-
|
159 |
-
DatasetMetadata(**initial_state)
|
160 |
-
valid = "βοΈ This is a valid tagset!"
|
161 |
-
except Exception as e:
|
162 |
-
valid = f"""
|
163 |
-
π This is an invalid tagset, here are the errors in it:
|
164 |
-
```
|
165 |
-
{e}
|
166 |
-
```
|
167 |
-
You're _very_ welcome to fix these issues and submit a new PR on [`datasets`](https://github.com/huggingface/datasets/)
|
168 |
-
"""
|
169 |
st.sidebar.markdown(
|
170 |
f"""
|
171 |
---
|
@@ -323,13 +328,8 @@ state["size_categories"] = [
|
|
323 |
########################
|
324 |
## Show results
|
325 |
########################
|
326 |
-
|
327 |
-
|
328 |
-
valid = "β Validated! Copy it into your dataset's `README.md` header! π€ "
|
329 |
-
except Exception as e:
|
330 |
-
valid = f"""π Could not validate:
|
331 |
-
```{e}```
|
332 |
-
"""
|
333 |
rightcol.markdown(
|
334 |
f"""
|
335 |
### Finalized tag set
|
@@ -339,5 +339,15 @@ rightcol.markdown(
|
|
339 |
```yaml
|
340 |
{yaml.dump(state)}
|
341 |
```
|
|
|
|
|
|
|
|
|
342 |
""",
|
343 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
from pathlib import Path
|
3 |
+
from typing import Callable, Dict, List, Tuple
|
4 |
|
5 |
import streamlit as st
|
6 |
import yaml
|
|
|
85 |
return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func)
|
86 |
|
87 |
|
88 |
+
def validate_dict(state_dict: Dict) -> str:
|
89 |
+
try:
|
90 |
+
DatasetMetadata(**state_dict)
|
91 |
+
valid = "βοΈ This is a valid tagset! π€"
|
92 |
+
except Exception as e:
|
93 |
+
valid = f"""
|
94 |
+
π This is an invalid tagset, here are the errors in it:
|
95 |
+
```
|
96 |
+
{e}
|
97 |
+
```
|
98 |
+
You're _very_ welcome to fix these issues and submit a new PR on [`datasets`](https://github.com/huggingface/datasets/)
|
99 |
+
"""
|
100 |
+
return valid
|
101 |
+
|
102 |
+
|
103 |
def new_state():
|
104 |
return {
|
105 |
"task_categories": [],
|
|
|
170 |
st.experimental_set_query_params()
|
171 |
|
172 |
if preloaded_id is not None and initial_state is not None:
|
173 |
+
valid = validate_dict(initial_state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
st.sidebar.markdown(
|
175 |
f"""
|
176 |
---
|
|
|
328 |
########################
|
329 |
## Show results
|
330 |
########################
|
331 |
+
|
332 |
+
valid = validate_dict(state)
|
|
|
|
|
|
|
|
|
|
|
333 |
rightcol.markdown(
|
334 |
f"""
|
335 |
### Finalized tag set
|
|
|
339 |
```yaml
|
340 |
{yaml.dump(state)}
|
341 |
```
|
342 |
+
---
|
343 |
+
#### Arbitrary yaml validator
|
344 |
+
|
345 |
+
This is a standalone tool, it is useful to check for errors on an existing tagset or modifying directly the text rather than the UI on the left.
|
346 |
""",
|
347 |
)
|
348 |
+
|
349 |
+
yamlblock = rightcol.text_area("Input your yaml here")
|
350 |
+
if yamlblock.strip() != "":
|
351 |
+
inputdict = yaml.safe_load(yamlblock)
|
352 |
+
valid = validate_dict(inputdict)
|
353 |
+
rightcol.markdown(valid)
|