lfoppiano commited on
Commit
0f074cc
·
1 Parent(s): 6915a03

added privacy statement, minor cosmetics on the key information, NER written without abbreviation

Browse files
Files changed (1) hide show
  1. streamlit_app.py +27 -15
streamlit_app.py CHANGED
@@ -48,11 +48,13 @@ if "messages" not in st.session_state:
48
  if 'ner_processing' not in st.session_state:
49
  st.session_state['ner_processing'] = False
50
 
 
 
51
 
52
  def new_file():
53
  st.session_state['loaded_embeddings'] = None
54
  st.session_state['doc_id'] = None
55
-
56
 
57
  # @st.cache_resource
58
  def init_qa(model):
@@ -128,11 +130,15 @@ def play_old_messages():
128
  else:
129
  st.write(message['content'])
130
 
 
131
  # is_api_key_provided = st.session_state['api_key']
132
 
133
  with st.sidebar:
 
 
 
134
  st.session_state['model'] = model = st.radio(
135
- "Model (cannot be changed after selection or upload)",
136
  ("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
137
  index=1,
138
  captions=[
@@ -140,15 +146,17 @@ with st.sidebar:
140
  "Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
141
  # "LLama2-70B-Chat + Sentence BERT (embeddings)",
142
  ],
143
- help="Select the model you want to use.",
144
- disabled=st.session_state['doc_id'] is not None)
145
 
146
  if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
147
- api_key = st.text_input('Huggingface API Key',
148
- type="password") if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ else os.environ[
149
- 'HUGGINGFACEHUB_API_TOKEN']
150
- st.markdown(
151
- "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
 
 
152
 
153
  if api_key:
154
  # st.session_state['api_key'] = is_api_key_provided = True
@@ -159,10 +167,13 @@ with st.sidebar:
159
  st.session_state['rqa'][model] = init_qa(model)
160
 
161
  elif model == 'chatgpt-3.5-turbo':
162
- api_key = st.text_input('OpenAI API Key', type="password") if 'OPENAI_API_KEY' not in os.environ else \
163
- os.environ['OPENAI_API_KEY']
164
- st.markdown(
165
- "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
 
 
 
166
  if api_key:
167
  # st.session_state['api_key'] = is_api_key_provided = True
168
  with st.spinner("Preparing environment"):
@@ -177,7 +188,8 @@ st.title("📝 Scientific Document Insight Q&A")
177
  st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
178
 
179
  uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
180
- disabled=st.session_state['model'] is not None and st.session_state['model'] not in st.session_state['api_keys'],
 
181
  help="The full-text is extracted using Grobid. ")
182
 
183
  question = st.chat_input(
@@ -198,7 +210,7 @@ with st.sidebar:
198
  help="Number of chunks to consider when answering a question",
199
  disabled=not uploaded_file)
200
 
201
- st.session_state['ner_processing'] = st.checkbox("NER processing on LLM response")
202
  st.markdown(
203
  '**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
204
  unsafe_allow_html=True)
 
48
  if 'ner_processing' not in st.session_state:
49
  st.session_state['ner_processing'] = False
50
 
51
+ if 'uploaded' not in st.session_state:
52
+ st.session_state['uploaded'] = False
53
 
54
  def new_file():
55
  st.session_state['loaded_embeddings'] = None
56
  st.session_state['doc_id'] = None
57
+ st.session_state['uploaded'] = True
58
 
59
  # @st.cache_resource
60
  def init_qa(model):
 
130
  else:
131
  st.write(message['content'])
132
 
133
+
134
  # is_api_key_provided = st.session_state['api_key']
135
 
136
  with st.sidebar:
137
+ st.markdown(
138
+ ":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
139
+
140
  st.session_state['model'] = model = st.radio(
141
+ "Model",
142
  ("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
143
  index=1,
144
  captions=[
 
146
  "Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
147
  # "LLama2-70B-Chat + Sentence BERT (embeddings)",
148
  ],
149
+ help="Select the LLM model and embeddings you want to use.",
150
+ disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
151
 
152
  if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
153
+ if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
154
+ api_key = st.text_input('Huggingface API Key', type="password")
155
+
156
+ st.markdown(
157
+ "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
158
+ else:
159
+ api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
160
 
161
  if api_key:
162
  # st.session_state['api_key'] = is_api_key_provided = True
 
167
  st.session_state['rqa'][model] = init_qa(model)
168
 
169
  elif model == 'chatgpt-3.5-turbo':
170
+ if 'OPENAI_API_KEY' not in os.environ:
171
+ api_key = st.text_input('OpenAI API Key', type="password")
172
+ st.markdown(
173
+ "Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
174
+ else:
175
+ api_key = os.environ['OPENAI_API_KEY']
176
+
177
  if api_key:
178
  # st.session_state['api_key'] = is_api_key_provided = True
179
  with st.spinner("Preparing environment"):
 
188
  st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
189
 
190
  uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
191
+ disabled=st.session_state['model'] is not None and st.session_state['model'] not in
192
+ st.session_state['api_keys'],
193
  help="The full-text is extracted using Grobid. ")
194
 
195
  question = st.chat_input(
 
210
  help="Number of chunks to consider when answering a question",
211
  disabled=not uploaded_file)
212
 
213
+ st.session_state['ner_processing'] = st.checkbox("Named Entities Recognition (NER) processing on LLM response")
214
  st.markdown(
215
  '**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
216
  unsafe_allow_html=True)