Spaces:
Build error
Build error
Pietro Lesci
commited on
Commit
·
8400e75
1
Parent(s):
11bd087
UI and disable preprocessing
Browse files- src/components.py +39 -35
src/components.py
CHANGED
@@ -7,25 +7,25 @@ from src.utils import get_col_indices
|
|
7 |
|
8 |
|
9 |
def form(df):
|
10 |
-
with st.form("
|
11 |
-
col1, col2 = st.columns(
|
|
|
|
|
12 |
with col1:
|
13 |
-
|
14 |
-
cols = [""] + df.columns.tolist()
|
15 |
-
text_index, label_index = get_col_indices(cols)
|
16 |
-
|
17 |
label_column = st.selectbox(
|
18 |
"Select label column",
|
19 |
cols,
|
20 |
index=label_index,
|
21 |
help="Select the column containing the labels",
|
22 |
)
|
|
|
23 |
text_column = st.selectbox(
|
24 |
"Select text column",
|
25 |
cols,
|
26 |
index=text_index,
|
27 |
help="Select the column containing the text",
|
28 |
)
|
|
|
29 |
language = st.selectbox(
|
30 |
"Select language",
|
31 |
[i.name for i in Languages],
|
@@ -35,41 +35,45 @@ def form(df):
|
|
35 |
""",
|
36 |
)
|
37 |
|
38 |
-
with
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
# Every form must have a submit button.
|
65 |
submitted = st.form_submit_button("Submit")
|
66 |
if submitted:
|
67 |
|
68 |
# preprocess
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
73 |
# prepare input
|
74 |
with st.spinner("Step 2/4: Preparing inputs"):
|
75 |
input_dict = input_transform(df[text_column], df[label_column])
|
|
|
7 |
|
8 |
|
9 |
def form(df):
|
10 |
+
with st.form("Wordify form"):
|
11 |
+
col1, col2, col3 = st.columns(3)
|
12 |
+
cols = [""] + df.columns.tolist()
|
13 |
+
text_index, label_index = get_col_indices(cols)
|
14 |
with col1:
|
|
|
|
|
|
|
|
|
15 |
label_column = st.selectbox(
|
16 |
"Select label column",
|
17 |
cols,
|
18 |
index=label_index,
|
19 |
help="Select the column containing the labels",
|
20 |
)
|
21 |
+
with col2:
|
22 |
text_column = st.selectbox(
|
23 |
"Select text column",
|
24 |
cols,
|
25 |
index=text_index,
|
26 |
help="Select the column containing the text",
|
27 |
)
|
28 |
+
with col3:
|
29 |
language = st.selectbox(
|
30 |
"Select language",
|
31 |
[i.name for i in Languages],
|
|
|
35 |
""",
|
36 |
)
|
37 |
|
38 |
+
with st.expander("Advanced Options"):
|
39 |
+
disable_preprocessing = st.checkbox("Disable Preprocessing", False)
|
40 |
+
|
41 |
+
if not disable_preprocessing:
|
42 |
+
steps_options = list(PreprocessingPipeline.pipeline_components().keys())
|
43 |
+
pre_steps = st.multiselect(
|
44 |
+
"Select pre-lemmatization processing steps (ordered)",
|
45 |
+
options=steps_options,
|
46 |
+
default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_PRE.value],
|
47 |
+
format_func=lambda x: x.replace("_", " ").title(),
|
48 |
+
help="Select the processing steps to apply before the text is lemmatized",
|
49 |
+
)
|
50 |
+
|
51 |
+
lammatization_options = list(PreprocessingPipeline.lemmatization_component().keys())
|
52 |
+
lemmatization_step = st.selectbox(
|
53 |
+
"Select lemmatization",
|
54 |
+
options=lammatization_options,
|
55 |
+
index=PreprocessingConfigs.DEFAULT_LEMMA.value,
|
56 |
+
help="Select lemmatization procedure",
|
57 |
+
)
|
58 |
+
|
59 |
+
post_steps = st.multiselect(
|
60 |
+
"Select post-lemmatization processing steps (ordered)",
|
61 |
+
options=steps_options,
|
62 |
+
default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_POST.value],
|
63 |
+
format_func=lambda x: x.replace("_", " ").title(),
|
64 |
+
help="Select the processing steps to apply after the text is lemmatized",
|
65 |
+
)
|
66 |
|
67 |
# Every form must have a submit button.
|
68 |
submitted = st.form_submit_button("Submit")
|
69 |
if submitted:
|
70 |
|
71 |
# preprocess
|
72 |
+
if not disable_preprocessing:
|
73 |
+
with st.spinner("Step 1/4: Preprocessing text"):
|
74 |
+
pipe = PreprocessingPipeline(language, pre_steps, lemmatization_step, post_steps)
|
75 |
+
df = pipe.vaex_process(df, text_column)
|
76 |
+
|
77 |
# prepare input
|
78 |
with st.spinner("Step 2/4: Preparing inputs"):
|
79 |
input_dict = input_transform(df[text_column], df[label_column])
|