ugmSorcero commited on
Commit
dbcf2e8
·
1 Parent(s): 46323da
Files changed (2) hide show
  1. interface/components.py +7 -5
  2. interface/utils.py +4 -2
interface/components.py CHANGED
@@ -80,11 +80,11 @@ def component_article_url(container):
80
  st.markdown("---")
81
  else:
82
  break
83
-
84
  for idx, doc in enumerate(urls):
85
  with st.expander(f"Preview URL {idx}"):
86
  st.write(doc)
87
-
88
  corpus = [
89
  {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
90
  ]
@@ -98,7 +98,9 @@ def component_file_input(container):
98
  doc_id = 1
99
  with st.expander("Enter Files"):
100
  while True:
101
- file = st.file_uploader("Upload a .txt, .pdf, .csv, image file", key=doc_id)
 
 
102
  if file != None:
103
  extracted_text = extract_text_from_file(file)
104
  if extracted_text != None:
@@ -109,11 +111,11 @@ def component_file_input(container):
109
  break
110
  else:
111
  break
112
-
113
  for idx, doc in enumerate(files):
114
  with st.expander(f"Preview File {idx}"):
115
  st.write(doc)
116
-
117
  corpus = [
118
  {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
119
  ]
 
80
  st.markdown("---")
81
  else:
82
  break
83
+
84
  for idx, doc in enumerate(urls):
85
  with st.expander(f"Preview URL {idx}"):
86
  st.write(doc)
87
+
88
  corpus = [
89
  {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
90
  ]
 
98
  doc_id = 1
99
  with st.expander("Enter Files"):
100
  while True:
101
+ file = st.file_uploader(
102
+ "Upload a .txt, .pdf, .csv, image file", key=doc_id
103
+ )
104
  if file != None:
105
  extracted_text = extract_text_from_file(file)
106
  if extracted_text != None:
 
111
  break
112
  else:
113
  break
114
+
115
  for idx, doc in enumerate(files):
116
  with st.expander(f"Preview File {idx}"):
117
  st.write(doc)
118
+
119
  corpus = [
120
  {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
121
  ]
interface/utils.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  import pytesseract
9
  from PIL import Image
10
 
 
11
  def get_pipelines():
12
  pipeline_names, pipeline_funcs = list(
13
  zip(*getmembers(pipelines_functions, isfunction))
@@ -26,6 +27,7 @@ def extract_text_from_url(url: str):
26
 
27
  return article.text
28
 
 
29
  @st.experimental_memo
30
  def extract_text_from_file(file):
31
  # read text file
@@ -77,9 +79,9 @@ def extract_text_from_file(file):
77
  continue
78
  file_text += " " + txt
79
  return file_text
80
-
81
  # read image file (OCR)
82
- elif file.type == 'image/jpeg':
83
  return pytesseract.image_to_string(Image.open(file))
84
 
85
  else:
 
8
  import pytesseract
9
  from PIL import Image
10
 
11
+
12
  def get_pipelines():
13
  pipeline_names, pipeline_funcs = list(
14
  zip(*getmembers(pipelines_functions, isfunction))
 
27
 
28
  return article.text
29
 
30
+
31
  @st.experimental_memo
32
  def extract_text_from_file(file):
33
  # read text file
 
79
  continue
80
  file_text += " " + txt
81
  return file_text
82
+
83
  # read image file (OCR)
84
+ elif file.type == "image/jpeg":
85
  return pytesseract.image_to_string(Image.open(file))
86
 
87
  else: