Tuana commited on
Commit
cb95f0e
Β·
1 Parent(s): 12c1880

revert to only pdfs

Browse files
Files changed (1) hide show
  1. app.py +2 -29
app.py CHANGED
@@ -32,21 +32,9 @@ def pdf_to_document_store(pdf_file):
32
  preprocessed_docs=preprocessor.process(doc)
33
  document_store.write_documents(preprocessed_docs)
34
  temp_file.close()
35
-
36
- def crawl_url(url):
37
- crawler = Crawler(output_dir="crawled_files", overwrite_existing_files=True, crawler_depth=1)
38
- try:
39
- docs = crawler.crawl(urls=[url])
40
- preprocessed_docs = preprocessor.process(docs)
41
- document_store.write_documents(preprocessed_docs)
42
- except:
43
- st.write('We were unable to crawl the contents of that URL, please try something else')
44
 
45
  def summarize(content):
46
- if st.session_state.pdf:
47
- pdf_to_document_store(content)
48
- elif st.session_state.url:
49
- crawl_url(content)
50
  summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
51
  return summaries
52
 
@@ -55,8 +43,6 @@ def set_state_if_absent(key, value):
55
  st.session_state[key] = value
56
 
57
  set_state_if_absent("summaries", None)
58
- set_state_if_absent("url", False)
59
- set_state_if_absent("pdf", False)
60
 
61
  document_store, summarizer, preprocessor = start_haystack()
62
 
@@ -69,24 +55,11 @@ This Summarization demo uses a [Haystack TransformerSummarizer node](https://hay
69
  """, unsafe_allow_html=True)
70
 
71
  uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
72
- url = st.text_input(label="enter a URL")
73
-
74
- if (validators.url(url)) and (uploaded_file is None):
75
- if st.button('Summarize contents of URL'):
76
- with st.spinner("πŸ“š    Please wait while we produce a summary..."):
77
- try:
78
- st.session_state.pdf = False
79
- st.session_state.url = True
80
- st. session_state.summaries = summarize(url)
81
- except Exception as e:
82
- logging.exception(e)
83
 
84
- if (uploaded_file is not None) and not validators.url(url):
85
  if st.button('Summarize Document'):
86
  with st.spinner("πŸ“š    Please wait while we produce a summary..."):
87
  try:
88
- st.session_state.pdf = True
89
- st.session_state.url = False
90
  st.session_state.summaries = summarize(uploaded_file)
91
  except Exception as e:
92
  logging.exception(e)
 
32
  preprocessed_docs=preprocessor.process(doc)
33
  document_store.write_documents(preprocessed_docs)
34
  temp_file.close()
 
 
 
 
 
 
 
 
 
35
 
36
  def summarize(content):
37
+ pdf_to_document_store(content)
 
 
 
38
  summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
39
  return summaries
40
 
 
43
  st.session_state[key] = value
44
 
45
  set_state_if_absent("summaries", None)
 
 
46
 
47
  document_store, summarizer, preprocessor = start_haystack()
48
 
 
55
  """, unsafe_allow_html=True)
56
 
57
  uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ if uploaded_file is not None :
60
  if st.button('Summarize Document'):
61
  with st.spinner("πŸ“š    Please wait while we produce a summary..."):
62
  try:
 
 
63
  st.session_state.summaries = summarize(uploaded_file)
64
  except Exception as e:
65
  logging.exception(e)