Pietro Lesci commited on
Commit
8400e75
·
1 Parent(s): 11bd087

UI and disable preprocessing

Browse files
Files changed (1) hide show
  1. src/components.py +39 -35
src/components.py CHANGED
@@ -7,25 +7,25 @@ from src.utils import get_col_indices
7
 
8
 
9
  def form(df):
10
- with st.form("my_form"):
11
- col1, col2 = st.columns([1, 2])
 
 
12
  with col1:
13
-
14
- cols = [""] + df.columns.tolist()
15
- text_index, label_index = get_col_indices(cols)
16
-
17
  label_column = st.selectbox(
18
  "Select label column",
19
  cols,
20
  index=label_index,
21
  help="Select the column containing the labels",
22
  )
 
23
  text_column = st.selectbox(
24
  "Select text column",
25
  cols,
26
  index=text_index,
27
  help="Select the column containing the text",
28
  )
 
29
  language = st.selectbox(
30
  "Select language",
31
  [i.name for i in Languages],
@@ -35,41 +35,45 @@ def form(df):
35
  """,
36
  )
37
 
38
- with col2:
39
- steps_options = list(PreprocessingPipeline.pipeline_components().keys())
40
- pre_steps = st.multiselect(
41
- "Select pre-lemmatization processing steps (ordered)",
42
- options=steps_options,
43
- default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_PRE.value],
44
- format_func=lambda x: x.replace("_", " ").title(),
45
- help="Select the processing steps to apply before the text is lemmatized",
46
- )
47
-
48
- lammatization_options = list(PreprocessingPipeline.lemmatization_component().keys())
49
- lemmatization_step = st.selectbox(
50
- "Select lemmatization",
51
- options=lammatization_options,
52
- index=PreprocessingConfigs.DEFAULT_LEMMA.value,
53
- help="Select lemmatization procedure",
54
- )
55
-
56
- post_steps = st.multiselect(
57
- "Select post-lemmatization processing steps (ordered)",
58
- options=steps_options,
59
- default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_POST.value],
60
- format_func=lambda x: x.replace("_", " ").title(),
61
- help="Select the processing steps to apply after the text is lemmatized",
62
- )
 
 
 
63
 
64
  # Every form must have a submit button.
65
  submitted = st.form_submit_button("Submit")
66
  if submitted:
67
 
68
  # preprocess
69
- with st.spinner("Step 1/4: Preprocessing text"):
70
- pipe = PreprocessingPipeline(language, pre_steps, lemmatization_step, post_steps)
71
- df = pipe.vaex_process(df, text_column)
72
-
 
73
  # prepare input
74
  with st.spinner("Step 2/4: Preparing inputs"):
75
  input_dict = input_transform(df[text_column], df[label_column])
 
7
 
8
 
9
  def form(df):
10
+ with st.form("Wordify form"):
11
+ col1, col2, col3 = st.columns(3)
12
+ cols = [""] + df.columns.tolist()
13
+ text_index, label_index = get_col_indices(cols)
14
  with col1:
 
 
 
 
15
  label_column = st.selectbox(
16
  "Select label column",
17
  cols,
18
  index=label_index,
19
  help="Select the column containing the labels",
20
  )
21
+ with col2:
22
  text_column = st.selectbox(
23
  "Select text column",
24
  cols,
25
  index=text_index,
26
  help="Select the column containing the text",
27
  )
28
+ with col3:
29
  language = st.selectbox(
30
  "Select language",
31
  [i.name for i in Languages],
 
35
  """,
36
  )
37
 
38
+ with st.expander("Advanced Options"):
39
+ disable_preprocessing = st.checkbox("Disable Preprocessing", False)
40
+
41
+ if not disable_preprocessing:
42
+ steps_options = list(PreprocessingPipeline.pipeline_components().keys())
43
+ pre_steps = st.multiselect(
44
+ "Select pre-lemmatization processing steps (ordered)",
45
+ options=steps_options,
46
+ default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_PRE.value],
47
+ format_func=lambda x: x.replace("_", " ").title(),
48
+ help="Select the processing steps to apply before the text is lemmatized",
49
+ )
50
+
51
+ lammatization_options = list(PreprocessingPipeline.lemmatization_component().keys())
52
+ lemmatization_step = st.selectbox(
53
+ "Select lemmatization",
54
+ options=lammatization_options,
55
+ index=PreprocessingConfigs.DEFAULT_LEMMA.value,
56
+ help="Select lemmatization procedure",
57
+ )
58
+
59
+ post_steps = st.multiselect(
60
+ "Select post-lemmatization processing steps (ordered)",
61
+ options=steps_options,
62
+ default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_POST.value],
63
+ format_func=lambda x: x.replace("_", " ").title(),
64
+ help="Select the processing steps to apply after the text is lemmatized",
65
+ )
66
 
67
  # Every form must have a submit button.
68
  submitted = st.form_submit_button("Submit")
69
  if submitted:
70
 
71
  # preprocess
72
+ if not disable_preprocessing:
73
+ with st.spinner("Step 1/4: Preprocessing text"):
74
+ pipe = PreprocessingPipeline(language, pre_steps, lemmatization_step, post_steps)
75
+ df = pipe.vaex_process(df, text_column)
76
+
77
  # prepare input
78
  with st.spinner("Step 2/4: Preparing inputs"):
79
  input_dict = input_transform(df[text_column], df[label_column])