Pietro Lesci commited on
Commit
e330a04
·
1 Parent(s): fdbadfe

add processing step doc

Browse files
Files changed (3) hide show
  1. app.py +2 -1
  2. src/components.py +18 -0
  3. src/preprocessing.py +2 -0
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
 
3
- from src.components import faq, footer, form, presentation, analysis
4
  from src.utils import convert_df, get_logo, read_file
5
 
6
  # app configs
@@ -42,6 +42,7 @@ if not uploaded_fl:
42
  else:
43
  df = read_file(uploaded_fl)
44
  outputs = form(df)
 
45
 
46
  # change or create session state
47
  if outputs is not None or "outputs" not in st.session_state:
 
1
  import streamlit as st
2
 
3
+ from src.components import faq, footer, form, presentation, analysis, docs
4
  from src.utils import convert_df, get_logo, read_file
5
 
6
  # app configs
 
42
  else:
43
  df = read_file(uploaded_fl)
44
  outputs = form(df)
45
+ docs()
46
 
47
  # change or create session state
48
  if outputs is not None or "outputs" not in st.session_state:
src/components.py CHANGED
@@ -8,6 +8,23 @@ from src.wordifier import input_transform, output_transform, wordifier
8
  from src.utils import get_col_indices
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def form(df):
12
  st.subheader("Parameters")
13
  with st.form("Wordify form"):
@@ -43,6 +60,7 @@ def form(df):
43
 
44
  if not disable_preprocessing:
45
  steps_options = list(PreprocessingPipeline.pipeline_components().keys())
 
46
  pre_steps = st.multiselect(
47
  "Select pre-lemmatization processing steps (ordered)",
48
  options=steps_options,
 
8
  from src.utils import get_col_indices
9
 
10
 
11
+ def docs():
12
+ steps_options = list(PreprocessingPipeline.pipeline_components().keys())
13
+
14
+ with st.expander("Documentation for the Advanced Options"):
15
+ component_name = st.selectbox(
16
+ "Select a processing step to see docs",
17
+ options=[""] + steps_options,
18
+ index=1,
19
+ format_func=lambda x: x.replace("_", " ").title(),
20
+ help="Select a processing step to see the relative documentation",
21
+ )
22
+
23
+ pipe_component = PreprocessingPipeline.pipeline_components().get(component_name)
24
+ if pipe_component is not None:
25
+ st.help(pipe_component)
26
+
27
+
28
  def form(df):
29
  st.subheader("Parameters")
30
  with st.form("Wordify form"):
 
60
 
61
  if not disable_preprocessing:
62
  steps_options = list(PreprocessingPipeline.pipeline_components().keys())
63
+
64
  pre_steps = st.multiselect(
65
  "Select pre-lemmatization processing steps (ordered)",
66
  options=steps_options,
src/preprocessing.py CHANGED
@@ -25,6 +25,7 @@ def normalize_acronyms(t: str) -> str:
25
 
26
  _re_non_word = re.compile(r"\W")
27
  def remove_non_word(t: str) -> str:
 
28
  return _re_non_word.sub(" ", t)
29
 
30
 
@@ -52,6 +53,7 @@ def normalize_repeating_words(t: str) -> str:
52
 
53
 
54
  def lowercase(t: str) -> str:
 
55
  return t.lower()
56
 
57
 
 
25
 
26
  _re_non_word = re.compile(r"\W")
27
  def remove_non_word(t: str) -> str:
28
+ "Removes non-words characters from the text using the regex `\W`"
29
  return _re_non_word.sub(" ", t)
30
 
31
 
 
53
 
54
 
55
  def lowercase(t: str) -> str:
56
+ "Lowercases the text"
57
  return t.lower()
58
 
59