TheoLvs commited on
Commit
5f9881c
·
1 Parent(s): 599c798

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -382
app.py CHANGED
@@ -2,24 +2,35 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import os
 
5
  from datetime import datetime
6
 
7
  from utils import create_user_id
8
 
9
  from azure.storage.fileshare import ShareServiceClient
10
 
 
 
 
 
 
11
  # Langchain
12
  from langchain.embeddings import HuggingFaceEmbeddings
13
  from langchain.schema import AIMessage, HumanMessage
14
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
15
 
16
  # ClimateQ&A imports
17
- from climateqa.llm import get_llm
18
- from climateqa.chains import load_qa_chain_with_docs,load_qa_chain_with_text
19
- from climateqa.chains import load_reformulation_chain
20
- from climateqa.vectorstore import get_pinecone_vectorstore
21
- from climateqa.retriever import ClimateQARetriever
22
- from climateqa.prompts import audience_prompts
 
 
 
 
 
23
 
24
  # Load environment variables in local mode
25
  try:
@@ -60,21 +71,7 @@ share_client = service.get_share_client(file_share_name)
60
 
61
  user_id = create_user_id()
62
 
63
- #---------------------------------------------------------------------------
64
- # ClimateQ&A core functions
65
- #---------------------------------------------------------------------------
66
 
67
- from langchain.callbacks.base import BaseCallbackHandler
68
- from queue import Queue, Empty
69
- from threading import Thread
70
- from collections.abc import Generator
71
- from langchain.schema import LLMResult
72
- from typing import Any, Union,Dict,List
73
- from queue import SimpleQueue
74
- # # Create a Queue
75
- # Q = Queue()
76
-
77
- import re
78
 
79
  def parse_output_llm_with_sources(output):
80
  # Split the content into a list of text and "[Doc X]" references
@@ -93,156 +90,110 @@ def parse_output_llm_with_sources(output):
93
 
94
 
95
 
96
- job_done = object() # signals the processing is done
97
-
98
-
99
- class StreamingGradioCallbackHandler(BaseCallbackHandler):
100
- def __init__(self, q: SimpleQueue):
101
- self.q = q
102
-
103
- def on_llm_start(
104
- self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
105
- ) -> None:
106
- """Run when LLM starts running. Clean the queue."""
107
- while not self.q.empty():
108
- try:
109
- self.q.get(block=False)
110
- except Empty:
111
- continue
112
-
113
- def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
114
- """Run on new LLM token. Only available when streaming is enabled."""
115
- self.q.put(token)
116
-
117
- def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
118
- """Run when LLM ends running."""
119
- self.q.put(job_done)
120
-
121
- def on_llm_error(
122
- self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
123
- ) -> None:
124
- """Run when LLM errors."""
125
- self.q.put(job_done)
126
-
127
-
128
-
129
-
130
  # Create embeddings function and LLM
131
- embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
132
-
133
 
134
  # Create vectorstore and retriever
135
  vectorstore = get_pinecone_vectorstore(embeddings_function)
136
 
137
- #---------------------------------------------------------------------------
138
- # ClimateQ&A Streaming
139
- # From https://github.com/gradio-app/gradio/issues/5345
140
- # And https://stackoverflow.com/questions/76057076/how-to-stream-agents-response-in-langchain
141
- #---------------------------------------------------------------------------
142
 
143
- from threading import Thread
 
 
144
 
145
- import json
146
 
147
- def answer_user(query,query_example,history):
148
- if len(query) <= 2:
149
- raise Exception("Please ask a longer question")
150
- return query, history + [[query, ". . ."]]
 
 
 
 
 
151
 
152
- def answer_user_example(query,query_example,history):
153
- return query_example, history + [[query_example, ". . ."]]
 
154
 
155
- def fetch_sources(query,sources):
 
 
 
 
 
 
 
156
 
157
  # Prepare default values
158
  if len(sources) == 0:
159
  sources = ["IPCC"]
160
 
161
- llm_reformulation = get_llm(max_tokens = 512,temperature = 0.0,verbose = True,streaming = False)
162
- retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,k_summary = 3,k_total = 10)
163
- reformulation_chain = load_reformulation_chain(llm_reformulation)
164
 
165
- # Calculate language
166
- output_reformulation = reformulation_chain({"query":query})
167
- question = output_reformulation["question"]
168
- language = output_reformulation["language"]
169
 
170
- # Retrieve docs
171
- docs = retriever.get_relevant_documents(question)
172
 
173
- if len(docs) > 0:
174
-
175
- # Already display the sources
176
- sources_text = []
177
- for i, d in enumerate(docs, 1):
178
- sources_text.append(make_html_source(d, i))
179
- citations_text = "".join(sources_text)
180
- docs_text = "\n\n".join([d.page_content for d in docs])
181
- return "",citations_text,docs_text,question,language
182
- else:
183
- sources_text = "⚠️ No relevant passages found in the scientific reports (IPCC and IPBES)"
184
- citations_text = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
185
- docs_text = ""
186
- return "",citations_text,docs_text,question,language
187
 
 
188
 
189
- def answer_bot(query,history,docs,question,language,audience):
190
 
191
- if audience == "Children":
192
- audience_prompt = audience_prompts["children"]
193
- elif audience == "General public":
194
- audience_prompt = audience_prompts["general"]
195
- elif audience == "Experts":
196
- audience_prompt = audience_prompts["experts"]
197
- else:
198
- audience_prompt = audience_prompts["experts"]
199
 
200
- # Prepare Queue for streaming LLMs
201
- Q = SimpleQueue()
202
 
203
- llm_streaming = get_llm(max_tokens = 1024,temperature = 0.0,verbose = True,streaming = True,
204
- callbacks=[StreamingGradioCallbackHandler(Q),StreamingStdOutCallbackHandler()],
205
- )
 
 
 
 
206
 
207
- qa_chain = load_qa_chain_with_text(llm_streaming)
 
 
208
 
209
- def threaded_chain(question,audience,language,docs):
210
- try:
211
- response = qa_chain({"question":question,"audience":audience,"language":language,"summaries":docs})
212
- Q.put(response)
213
- Q.put(job_done)
214
- except Exception as e:
215
- print(e)
216
 
217
- history[-1][1] = ""
218
-
219
- textbox=gr.Textbox(placeholder=". . .",show_label=False,scale=1,lines = 1,interactive = False)
220
 
 
221
 
222
- if len(docs) > 0:
 
 
 
 
 
 
 
 
 
223
 
224
- # Start thread for streaming
225
- thread = Thread(
226
- target=threaded_chain,
227
- kwargs={"question":question,"audience":audience_prompt,"language":language,"docs":docs}
228
- )
229
- thread.start()
230
-
231
- while True:
232
- next_item = Q.get(block=True) # Blocks until an input is available
233
-
234
- if next_item is job_done:
235
- break
236
- elif isinstance(next_item, str):
237
- new_paragraph = history[-1][1] + next_item
238
- new_paragraph = parse_output_llm_with_sources(new_paragraph)
239
- history[-1][1] = new_paragraph
240
- yield textbox,history
241
- else:
242
- pass
243
- thread.join()
244
-
245
- # Log answer on Azure Blob Storage
246
  timestamp = str(datetime.now().timestamp())
247
  file = timestamp + ".json"
248
  prompt = history[-1][0]
@@ -250,75 +201,31 @@ def answer_bot(query,history,docs,question,language,audience):
250
  "user_id": str(user_id),
251
  "prompt": prompt,
252
  "query": prompt,
253
- "question":question,
254
- "docs":docs,
255
  "answer": history[-1][1],
256
  "time": timestamp,
257
  }
258
  log_on_azure(file, logs, share_client)
259
 
260
 
 
 
 
 
 
 
261
 
262
- else:
263
- complete_response = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
264
- history[-1][1] += complete_response
265
- yield "",history
266
-
267
-
268
-
269
- # history_langchain_format = []
270
- # for human, ai in history:
271
- # history_langchain_format.append(HumanMessage(content=human))
272
- # history_langchain_format.append(AIMessage(content=ai))
273
- # history_langchain_format.append(HumanMessage(content=message)
274
- # for next_token, content in stream(message):
275
- # yield(content)
276
-
277
- # thread = Thread(target=threaded_chain, kwargs={"query":message,"audience":audience_prompt})
278
- # thread.start()
279
-
280
- # history[-1][1] = ""
281
- # while True:
282
- # next_item = Q.get(block=True) # Blocks until an input is available
283
-
284
- # print(type(next_item))
285
- # if next_item is job_done:
286
- # continue
287
-
288
- # elif isinstance(next_item, dict): # assuming LLMResult is a dictionary
289
- # response = next_item
290
- # if "source_documents" in response and len(response["source_documents"]) > 0:
291
- # sources_text = []
292
- # for i, d in enumerate(response["source_documents"], 1):
293
- # sources_text.append(make_html_source(d, i))
294
- # sources_text = "\n\n".join([f"Query used for retrieval:\n{response['question']}"] + sources_text)
295
- # # history[-1][1] += next_item["answer"]
296
- # # history[-1][1] += "\n\n" + sources_text
297
- # yield "", history, sources_text
298
-
299
- # else:
300
- # sources_text = "⚠️ No relevant passages found in the scientific reports (IPCC and IPBES)"
301
- # complete_response = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
302
- # history[-1][1] += "\n\n" + complete_response
303
- # yield "", history, sources_text
304
- # break
305
-
306
- # elif isinstance(next_item, str):
307
- # new_paragraph = history[-1][1] + next_item
308
- # new_paragraph = parse_output_llm_with_sources(new_paragraph)
309
- # history[-1][1] = new_paragraph
310
- # yield "", history, ""
311
-
312
- # thread.join()
313
-
314
- #---------------------------------------------------------------------------
315
- # ClimateQ&A core functions
316
- #---------------------------------------------------------------------------
317
 
318
 
319
  def make_html_source(source,i):
320
  meta = source.metadata
321
- content = source.page_content.split(":",1)[1].strip()
 
322
  return f"""
323
  <div class="card">
324
  <div class="card-content">
@@ -336,103 +243,10 @@ def make_html_source(source,i):
336
 
337
 
338
 
339
- # def chat(
340
- # user_id: str,
341
- # query: str,
342
- # history: list = [system_template],
343
- # report_type: str = "IPCC",
344
- # threshold: float = 0.555,
345
- # ) -> tuple:
346
- # """retrieve relevant documents in the document store then query gpt-turbo
347
-
348
- # Args:
349
- # query (str): user message.
350
- # history (list, optional): history of the conversation. Defaults to [system_template].
351
- # report_type (str, optional): should be "All available" or "IPCC only". Defaults to "All available".
352
- # threshold (float, optional): similarity threshold, don't increase more than 0.568. Defaults to 0.56.
353
-
354
- # Yields:
355
- # tuple: chat gradio format, chat openai format, sources used.
356
- # """
357
-
358
- # if report_type not in ["IPCC","IPBES"]: report_type = "all"
359
- # print("Searching in ",report_type," reports")
360
- # # if report_type == "All available":
361
- # # retriever = retrieve_all
362
- # # elif report_type == "IPCC only":
363
- # # retriever = retrieve_giec
364
- # # else:
365
- # # raise Exception("report_type arg should be in (All available, IPCC only)")
366
-
367
- # reformulated_query = openai.Completion.create(
368
- # engine="EkiGPT",
369
- # prompt=get_reformulation_prompt(query),
370
- # temperature=0,
371
- # max_tokens=128,
372
- # stop=["\n---\n", "<|im_end|>"],
373
- # )
374
- # reformulated_query = reformulated_query["choices"][0]["text"]
375
- # reformulated_query, language = reformulated_query.split("\n")
376
- # language = language.split(":")[1].strip()
377
-
378
-
379
- # sources = retrieve_with_summaries(reformulated_query,retriever,k_total = 10,k_summary = 3,as_dict = True,source = report_type.lower(),threshold = threshold)
380
- # response_retriever = {
381
- # "language":language,
382
- # "reformulated_query":reformulated_query,
383
- # "query":query,
384
- # "sources":sources,
385
- # }
386
-
387
- # # docs = [d for d in retriever.retrieve(query=reformulated_query, top_k=10) if d.score > threshold]
388
- # messages = history + [{"role": "user", "content": query}]
389
-
390
- # if len(sources) > 0:
391
- # docs_string = []
392
- # docs_html = []
393
- # for i, d in enumerate(sources, 1):
394
- # docs_string.append(f"📃 Doc {i}: {d['meta']['short_name']} page {d['meta']['page_number']}\n{d['content']}")
395
- # docs_html.append(make_html_source(d,i))
396
- # docs_string = "\n\n".join([f"Query used for retrieval:\n{reformulated_query}"] + docs_string)
397
- # docs_html = "\n\n".join([f"Query used for retrieval:\n{reformulated_query}"] + docs_html)
398
- # messages.append({"role": "system", "content": f"{sources_prompt}\n\n{docs_string}\n\nAnswer in {language}:"})
399
-
400
-
401
- # response = openai.Completion.create(
402
- # engine="EkiGPT",
403
- # prompt=to_completion(messages),
404
- # temperature=0, # deterministic
405
- # stream=True,
406
- # max_tokens=1024,
407
- # )
408
-
409
- # complete_response = ""
410
- # messages.pop()
411
-
412
- # messages.append({"role": "assistant", "content": complete_response})
413
- # timestamp = str(datetime.now().timestamp())
414
- # file = user_id + timestamp + ".json"
415
- # logs = {
416
- # "user_id": user_id,
417
- # "prompt": query,
418
- # "retrived": sources,
419
- # "report_type": report_type,
420
- # "prompt_eng": messages[0],
421
- # "answer": messages[-1]["content"],
422
- # "time": timestamp,
423
- # }
424
- # log_on_azure(file, logs, share_client)
425
-
426
- # for chunk in response:
427
- # if (chunk_message := chunk["choices"][0].get("text")) and chunk_message != "<|im_end|>":
428
- # complete_response += chunk_message
429
- # messages[-1]["content"] = complete_response
430
- # gradio_format = make_pairs([a["content"] for a in messages[1:]])
431
- # yield gradio_format, messages, docs_html
432
 
433
  # else:
434
- # docs_string = "⚠️ No relevant passages found in the climate science reports (IPCC and IPBES)"
435
- # complete_response = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate issues).**"
436
  # messages.append({"role": "assistant", "content": complete_response})
437
  # gradio_format = make_pairs([a["content"] for a in messages[1:]])
438
  # yield gradio_format, messages, docs_string
@@ -451,14 +265,10 @@ def save_feedback(feed: str, user_id):
451
  return "Feedback submitted, thank you!"
452
 
453
 
454
- def reset_textbox():
455
- return gr.update(value="")
456
 
457
- import json
458
 
459
  def log_on_azure(file, logs, share_client):
460
  logs = json.dumps(logs)
461
- print(type(logs))
462
  file_client = share_client.get_file_client(file)
463
  print("Uploading logs to Azure Blob Storage")
464
  print("----------------------------------")
@@ -468,12 +278,6 @@ def log_on_azure(file, logs, share_client):
468
  print("Logs uploaded to Azure Blob Storage")
469
 
470
 
471
- # def disable_component():
472
- # return gr.update(interactive = False)
473
-
474
-
475
-
476
-
477
  # --------------------------------------------------------------------
478
  # Gradio
479
  # --------------------------------------------------------------------
@@ -482,15 +286,15 @@ def log_on_azure(file, logs, share_client):
482
  init_prompt = """
483
  Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
484
 
485
- 💡 How to use
486
  - **Language**: You can ask me your questions in any language.
487
  - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
488
  - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
489
 
490
- ⚠️ Limitations
491
  *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
492
 
493
- What do you want to learn ?
494
  """
495
 
496
 
@@ -501,21 +305,20 @@ def vote(data: gr.LikeData):
501
  print(data)
502
 
503
 
504
- def change_tab():
505
- return gr.Tabs.update(selected=1)
506
-
507
 
508
- with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
509
  # user_id_state = gr.State([user_id])
510
 
511
- with gr.Tab("🌍 ClimateQ&A"):
512
 
513
  with gr.Row(elem_id="chatbot-row"):
514
  with gr.Column(scale=2):
515
  # state = gr.State([system_template])
516
- bot = gr.Chatbot(
517
- value=[[None,init_prompt]],
518
- show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",avatar_images = ("assets/logo4.png",None))
 
 
519
 
520
  # bot.like(vote,None,None)
521
 
@@ -523,74 +326,62 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
523
 
524
  with gr.Row(elem_id = "input-message"):
525
  textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=1,lines = 1,interactive = True)
526
- # submit_button = gr.Button(">",scale = 1,elem_id = "submit-button")
527
 
528
 
529
  with gr.Column(scale=1, variant="panel",elem_id = "right-panel"):
530
 
531
 
532
  with gr.Tabs() as tabs:
533
- with gr.TabItem("📝 Examples",elem_id = "tab-examples",id = 0):
534
 
535
- examples_hidden = gr.Textbox(elem_id="hidden-message")
536
-
537
- examples_questions = gr.Examples(
538
- [
539
- "Is climate change caused by humans?",
540
- "What evidence do we have of climate change?",
541
- "What are the impacts of climate change?",
542
- "Can climate change be reversed?",
543
- "What is the difference between climate change and global warming?",
544
- "What can individuals do to address climate change?",
545
- "What are the main causes of climate change?",
546
- "What is the Paris Agreement and why is it important?",
547
- "Which industries have the highest GHG emissions?",
548
- "Is climate change a hoax created by the government or environmental organizations?",
549
- "What is the relationship between climate change and biodiversity loss?",
550
- "What is the link between gender equality and climate change?",
551
- "Is the impact of climate change really as severe as it is claimed to be?",
552
- "What is the impact of rising sea levels?",
553
- "What are the different greenhouse gases (GHG)?",
554
- "What is the warming power of methane?",
555
- "What is the jet stream?",
556
- "What is the breakdown of carbon sinks?",
557
- "How do the GHGs work ? Why does temperature increase ?",
558
- "What is the impact of global warming on ocean currents?",
559
- "How much warming is possible in 2050?",
560
- "What is the impact of climate change in Africa?",
561
- "Will climate change accelerate diseases and epidemics like COVID?",
562
- "What are the economic impacts of climate change?",
563
- "How much is the cost of inaction ?",
564
- "What is the relationship between climate change and poverty?",
565
- "What are the most effective strategies and technologies for reducing greenhouse gas (GHG) emissions?",
566
- "Is economic growth possible? What do you think about degrowth?",
567
- "Will technology save us?",
568
- "Is climate change a natural phenomenon ?",
569
- "Is climate change really happening or is it just a natural fluctuation in Earth's temperature?",
570
- "Is the scientific consensus on climate change really as strong as it is claimed to be?",
571
- ],
572
- [examples_hidden],
573
- examples_per_page=10,
574
- run_on_click=False,
575
- # cache_examples=True,
576
- )
577
 
578
- with gr.Tab("📚 Citations",elem_id = "tab-citations",id = 1):
 
579
  sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
580
  docs_textbox = gr.State("")
581
 
582
- with gr.Tab("⚙️ Configuration",elem_id = "tab-config",id = 2):
583
 
584
  gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
585
 
586
 
587
  dropdown_sources = gr.CheckboxGroup(
588
  ["IPCC", "IPBES"],
589
- label="Select reports",
590
  value=["IPCC"],
591
  interactive=True,
592
  )
593
 
 
 
 
 
 
 
 
 
594
  dropdown_audience = gr.Dropdown(
595
  ["Children","General public","Experts"],
596
  label="Select audience",
@@ -601,24 +392,56 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
601
  output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
602
  output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
603
 
 
 
 
604
 
 
 
 
 
 
 
605
 
606
- # textbox.submit(predict_climateqa,[textbox,bot],[None,bot,sources_textbox])
607
  (textbox
608
- .submit(answer_user, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
609
- .success(change_tab,None,tabs)
610
- .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
611
- .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue = True)
612
- .success(lambda x : textbox,[textbox],[textbox])
613
  )
614
 
615
  (examples_hidden
616
- .change(answer_user_example, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
617
- .success(change_tab,None,tabs)
618
- .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
619
- .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue=True)
620
- .success(lambda x : textbox,[textbox],[textbox])
621
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  # submit_button.click(answer_user, [textbox, bot], [textbox, bot], queue=True).then(
623
  # answer_bot, [textbox,bot,dropdown_audience,dropdown_sources], [textbox,bot,sources_textbox]
624
  # )
@@ -641,7 +464,7 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
641
  #---------------------------------------------------------------------------------------
642
 
643
 
644
- with gr.Tab("ℹ️ About ClimateQ&A",elem_classes = "max-height"):
645
  with gr.Row():
646
  with gr.Column(scale=1):
647
  gr.Markdown(
@@ -667,7 +490,7 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
667
  with gr.Column(scale=1):
668
  gr.Markdown(
669
  """
670
- ### 💪 Getting started
671
  - In the chatbot section, simply type your climate-related question, and ClimateQ&A will provide an answer with references to relevant IPCC reports.
672
  - ClimateQ&A retrieves specific passages from the IPCC reports to help answer your question accurately.
673
  - Source information, including page numbers and passages, is displayed on the right side of the screen for easy verification.
@@ -679,7 +502,7 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
679
  with gr.Column(scale=1):
680
  gr.Markdown(
681
  """
682
- ### ⚠️ Limitations
683
  <div class="warning-box">
684
  <ul>
685
  <li>Please note that, like any AI, the model may occasionally generate an inaccurate or imprecise answer. Always refer to the provided sources to verify the validity of the information given. If you find any issues with the response, kindly provide feedback to help improve the system.</li>
@@ -689,11 +512,11 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
689
  )
690
 
691
 
692
- with gr.Tab("📧 Contact, feedback and feature requests"):
693
  gr.Markdown(
694
  """
695
 
696
- 🤞 For any question or press request, contact Théo Alves Da Costa at <b>[email protected]</b>
697
 
698
  - ClimateQ&A welcomes community contributions. To participate, head over to the Community Tab and create a "New Discussion" to ask questions and share your insights.
699
  - Provide feedback through email, letting us know which insights you found accurate, useful, or not. Your input will help us improve the platform.
@@ -731,7 +554,7 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
731
  # openai_api_key_textbox.change(set_openai_api_key, inputs=[openai_api_key_textbox])
732
  # openai_api_key_textbox.submit(set_openai_api_key, inputs=[openai_api_key_textbox])
733
 
734
- with gr.Tab("📚 Sources",elem_classes = "max-height"):
735
  gr.Markdown("""
736
  | Source | Report | URL | Number of pages | Release date |
737
  | --- | --- | --- | --- | --- |
@@ -772,7 +595,7 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
772
  IPBES | Summary for Policymakers. Assessment Report on Land Degradation and Restoration. | https://zenodo.org/record/3237393/files/ipbes_assessment_report_ldra_EN.pdf | 48 | 2018
773
  """)
774
 
775
- with gr.Tab("🛢️ Carbon Footprint"):
776
  gr.Markdown("""
777
 
778
  Carbon emissions were measured during the development and inference process using CodeCarbon [https://github.com/mlco2/codecarbon](https://github.com/mlco2/codecarbon)
@@ -789,8 +612,20 @@ Or around 2 to 4 times more than a typical Google search.
789
  """
790
  )
791
 
792
- with gr.Tab("🪄 Changelog"):
793
  gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
794
 
795
  ##### v1.1.0 - *2023-10-16*
796
  - ClimateQ&A on Hugging Face is finally working again with all the new features !
@@ -807,6 +642,6 @@ Or around 2 to 4 times more than a typical Google search.
807
  """
808
  )
809
 
810
- demo.queue(concurrency_count=16)
811
 
812
  demo.launch()
 
2
  import pandas as pd
3
  import numpy as np
4
  import os
5
+ import time
6
  from datetime import datetime
7
 
8
  from utils import create_user_id
9
 
10
  from azure.storage.fileshare import ShareServiceClient
11
 
12
+
13
+ import re
14
+ import json
15
+
16
+
17
  # Langchain
18
  from langchain.embeddings import HuggingFaceEmbeddings
19
  from langchain.schema import AIMessage, HumanMessage
20
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
21
 
22
  # ClimateQ&A imports
23
+ from climateqa.engine.llm import get_llm
24
+ # from climateqa.chains import load_qa_chain_with_docs,load_qa_chain_with_text
25
+ # from climateqa.chains import load_reformulation_chain
26
+ from climateqa.engine.rag import make_rag_chain
27
+ from climateqa.engine.vectorstore import get_pinecone_vectorstore
28
+ from climateqa.engine.retriever import ClimateQARetriever
29
+ from climateqa.engine.embeddings import get_embeddings_function
30
+ from climateqa.engine.prompts import audience_prompts
31
+ from climateqa.sample_questions import QUESTIONS
32
+ from climateqa.constants import POSSIBLE_REPORTS
33
+ from climateqa.utils import get_image_from_azure_blob_storage
34
 
35
  # Load environment variables in local mode
36
  try:
 
71
 
72
  user_id = create_user_id()
73
 
 
 
 
74
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def parse_output_llm_with_sources(output):
77
  # Split the content into a list of text and "[Doc X]" references
 
90
 
91
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Create embeddings function and LLM
94
+ embeddings_function = get_embeddings_function()
 
95
 
96
  # Create vectorstore and retriever
97
  vectorstore = get_pinecone_vectorstore(embeddings_function)
98
 
 
 
 
 
 
99
 
100
+ def make_pairs(lst):
101
+ """from a list of even lenght, make tupple pairs"""
102
+ return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
103
 
 
104
 
105
+ def serialize_docs(docs):
106
+ new_docs = []
107
+ for doc in docs:
108
+ new_doc = {}
109
+ new_doc["page_content"] = doc.page_content
110
+ new_doc["metadata"] = doc.metadata
111
+ new_docs.append(new_doc)
112
+ return new_docs
113
+
114
 
115
+ async def chat(query,history,audience,sources,reports):
116
+ """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
117
+ (messages in gradio format, messages in langchain format, source documents)"""
118
 
119
+ if audience == "Children":
120
+ audience_prompt = audience_prompts["children"]
121
+ elif audience == "General public":
122
+ audience_prompt = audience_prompts["general"]
123
+ elif audience == "Experts":
124
+ audience_prompt = audience_prompts["experts"]
125
+ else:
126
+ audience_prompt = audience_prompts["experts"]
127
 
128
  # Prepare default values
129
  if len(sources) == 0:
130
  sources = ["IPCC"]
131
 
132
+ if len(reports) == 0:
133
+ reports = []
 
134
 
135
+ llm = get_llm(max_tokens = 1024,temperature = 0.0)
136
+ retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,reports = reports,k_summary = 3,k_total = 10,threshold=0.4)
137
+ rag_chain = make_rag_chain(retriever,llm)
 
138
 
139
+ source_string = ""
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ # gradio_format = make_pairs([a.content for a in history]) + [(query, "")]
143
 
144
+ # history = history + [(query,"")]
145
 
146
+ # print(history)
 
 
 
 
 
 
 
147
 
148
+ # print(gradio_format)
 
149
 
150
+ # # reset memory
151
+ # memory.clear()
152
+ # for message in history:
153
+ # memory.chat_memory.add_message(message)
154
+
155
+ inputs = {"query": query,"audience": audience_prompt}
156
+ result = rag_chain.astream_log(inputs)
157
 
158
+ reformulated_question_path_id = "/logs/flatten_dict/final_output"
159
+ retriever_path_id = "/logs/Retriever/final_output"
160
+ final_answer_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
161
 
162
+ docs_html = ""
163
+ output_query = ""
164
+ output_language = ""
165
+ gallery = []
 
 
 
166
 
167
+ async for op in result:
 
 
168
 
169
+ op = op.ops[0]
170
 
171
+ if op['path'] == reformulated_question_path_id: # reforulated question
172
+ output_language = op['value']["language"] # str
173
+ output_query = op["value"]["question"]
174
+
175
+ elif op['path'] == retriever_path_id: # documents
176
+ docs = op['value']['documents'] # List[Document]
177
+ docs_html = []
178
+ for i, d in enumerate(docs, 1):
179
+ docs_html.append(make_html_source(d, i))
180
+ docs_html = "".join(docs_html)
181
 
182
+
183
+ elif op['path'] == final_answer_path_id: # final answer
184
+ new_token = op['value'] # str
185
+ time.sleep(0.03)
186
+ answer_yet = history[-1][1] + new_token
187
+ answer_yet = parse_output_llm_with_sources(answer_yet)
188
+ history[-1] = (query,answer_yet)
189
+
190
+
191
+
192
+ history = [tuple(x) for x in history]
193
+ yield history,docs_html,output_query,output_language,gallery
194
+
195
+ # Log answer on Azure Blob Storage
196
+ if os.getenv("GRADIO_ENV") != "local":
 
 
 
 
 
 
 
197
  timestamp = str(datetime.now().timestamp())
198
  file = timestamp + ".json"
199
  prompt = history[-1][0]
 
201
  "user_id": str(user_id),
202
  "prompt": prompt,
203
  "query": prompt,
204
+ "question":output_query,
205
+ "docs":serialize_docs(docs),
206
  "answer": history[-1][1],
207
  "time": timestamp,
208
  }
209
  log_on_azure(file, logs, share_client)
210
 
211
 
212
+ gallery = [x.metadata["image_path"] for x in docs if (len(x.metadata["image_path"]) > 0 and "IAS" in x.metadata["image_path"])]
213
+ if len(gallery) > 0:
214
+ gallery = list(set("|".join(gallery).split("|")))
215
+ gallery = [get_image_from_azure_blob_storage(x) for x in gallery]
216
+
217
+ yield history,docs_html,output_query,output_language,gallery
218
 
219
+
220
+ # memory.save_context(inputs, {"answer": gradio_format[-1][1]})
221
+ # yield gradio_format, memory.load_memory_variables({})["history"], source_string
222
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
 
225
  def make_html_source(source,i):
226
  meta = source.metadata
227
+ # content = source.page_content.split(":",1)[1].strip()
228
+ content = source.page_content.strip()
229
  return f"""
230
  <div class="card">
231
  <div class="card-content">
 
243
 
244
 
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  # else:
248
+ # docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
249
+ # complete_response = "**No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate issues).**"
250
  # messages.append({"role": "assistant", "content": complete_response})
251
  # gradio_format = make_pairs([a["content"] for a in messages[1:]])
252
  # yield gradio_format, messages, docs_string
 
265
  return "Feedback submitted, thank you!"
266
 
267
 
 
 
268
 
 
269
 
270
  def log_on_azure(file, logs, share_client):
271
  logs = json.dumps(logs)
 
272
  file_client = share_client.get_file_client(file)
273
  print("Uploading logs to Azure Blob Storage")
274
  print("----------------------------------")
 
278
  print("Logs uploaded to Azure Blob Storage")
279
 
280
 
 
 
 
 
 
 
281
  # --------------------------------------------------------------------
282
  # Gradio
283
  # --------------------------------------------------------------------
 
286
  init_prompt = """
287
  Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
288
 
289
+ How to use
290
  - **Language**: You can ask me your questions in any language.
291
  - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
292
  - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
293
 
294
+ Limitations
295
  *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
296
 
297
+ What do you want to learn ?
298
  """
299
 
300
 
 
305
  print(data)
306
 
307
 
 
 
 
308
 
309
+ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main-component") as demo:
310
  # user_id_state = gr.State([user_id])
311
 
312
+ with gr.Tab("ClimateQ&A"):
313
 
314
  with gr.Row(elem_id="chatbot-row"):
315
  with gr.Column(scale=2):
316
  # state = gr.State([system_template])
317
+ chatbot = gr.Chatbot(
318
+ value=[(None,init_prompt)],
319
+ show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
320
+ avatar_images = ("https://i.ibb.co/YNyd5W2/logo4.png",None),
321
+ )#,avatar_images = ("assets/logo4.png",None))
322
 
323
  # bot.like(vote,None,None)
324
 
 
326
 
327
  with gr.Row(elem_id = "input-message"):
328
  textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=1,lines = 1,interactive = True)
 
329
 
330
 
331
  with gr.Column(scale=1, variant="panel",elem_id = "right-panel"):
332
 
333
 
334
  with gr.Tabs() as tabs:
335
+ with gr.TabItem("Examples",elem_id = "tab-examples",id = 0):
336
 
337
+ examples_hidden = gr.Textbox(visible = False)
338
+ first_key = list(QUESTIONS.keys())[0]
339
+ dropdown_samples = gr.Dropdown(QUESTIONS.keys(),value = first_key,interactive = True,show_label = True,label = "Select a category of sample questions",elem_id = "dropdown-samples")
340
+
341
+ samples = []
342
+ for i,key in enumerate(QUESTIONS.keys()):
343
+
344
+ examples_visible = True if i == 0 else False
345
+
346
+ with gr.Row(visible = examples_visible) as group_examples:
347
+
348
+ examples_questions = gr.Examples(
349
+ QUESTIONS[key],
350
+ [examples_hidden],
351
+ examples_per_page=8,
352
+ run_on_click=False,
353
+ elem_id=f"examples{i}",
354
+ # label = "Click on the example question or enter your own",
355
+ # cache_examples=True,
356
+ )
357
+
358
+ samples.append(group_examples)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
+
361
+ with gr.Tab("Citations",elem_id = "tab-citations",id = 1):
362
  sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
363
  docs_textbox = gr.State("")
364
 
365
+ with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
366
 
367
  gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
368
 
369
 
370
  dropdown_sources = gr.CheckboxGroup(
371
  ["IPCC", "IPBES"],
372
+ label="Select source",
373
  value=["IPCC"],
374
  interactive=True,
375
  )
376
 
377
+ dropdown_reports = gr.Dropdown(
378
+ POSSIBLE_REPORTS,
379
+ label="Or select specific reports",
380
+ multiselect=True,
381
+ value=None,
382
+ interactive=True,
383
+ )
384
+
385
  dropdown_audience = gr.Dropdown(
386
  ["Children","General public","Experts"],
387
  label="Select audience",
 
392
  output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
393
  output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
394
 
395
+ with gr.Tab("Images",elem_id = "tab-images",id = 3):
396
+ gallery = gr.Gallery()
397
+
398
 
399
+ def start_chat(query,history):
400
+ history = history + [(query,"")]
401
+ return (gr.update(interactive = False),gr.update(selected=1),history)
402
+
403
+ def finish_chat():
404
+ return (gr.update(interactive = True,value = ""))
405
 
 
406
  (textbox
407
+ .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False)
408
+ .success(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery])
409
+ .success(finish_chat, None, [textbox])
 
 
410
  )
411
 
412
  (examples_hidden
413
+ .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False)
414
+ .success(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery])
415
+ .success(finish_chat, None, [textbox])
 
 
416
  )
417
+
418
+
419
+ def change_sample_questions(key):
420
+ index = list(QUESTIONS.keys()).index(key)
421
+ visible_bools = [False] * len(samples)
422
+ visible_bools[index] = True
423
+ return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
424
+
425
+
426
+
427
+ dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
428
+
429
+ # # textbox.submit(predict_climateqa,[textbox,bot],[None,bot,sources_textbox])
430
+ # (textbox
431
+ # .submit(answer_user, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
432
+ # .success(change_tab,None,tabs)
433
+ # .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
434
+ # .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue = True)
435
+ # .success(lambda x : textbox,[textbox],[textbox])
436
+ # )
437
+
438
+ # (examples_hidden
439
+ # .change(answer_user_example, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
440
+ # .success(change_tab,None,tabs)
441
+ # .success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
442
+ # .success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue=True)
443
+ # .success(lambda x : textbox,[textbox],[textbox])
444
+ # )
445
  # submit_button.click(answer_user, [textbox, bot], [textbox, bot], queue=True).then(
446
  # answer_bot, [textbox,bot,dropdown_audience,dropdown_sources], [textbox,bot,sources_textbox]
447
  # )
 
464
  #---------------------------------------------------------------------------------------
465
 
466
 
467
+ with gr.Tab("About ClimateQ&A",elem_classes = "max-height other-tabs"):
468
  with gr.Row():
469
  with gr.Column(scale=1):
470
  gr.Markdown(
 
490
  with gr.Column(scale=1):
491
  gr.Markdown(
492
  """
493
+ ### Getting started
494
  - In the chatbot section, simply type your climate-related question, and ClimateQ&A will provide an answer with references to relevant IPCC reports.
495
  - ClimateQ&A retrieves specific passages from the IPCC reports to help answer your question accurately.
496
  - Source information, including page numbers and passages, is displayed on the right side of the screen for easy verification.
 
502
  with gr.Column(scale=1):
503
  gr.Markdown(
504
  """
505
+ ### Limitations
506
  <div class="warning-box">
507
  <ul>
508
  <li>Please note that, like any AI, the model may occasionally generate an inaccurate or imprecise answer. Always refer to the provided sources to verify the validity of the information given. If you find any issues with the response, kindly provide feedback to help improve the system.</li>
 
512
  )
513
 
514
 
515
+ with gr.Tab("Contact, feedback and feature requests",elem_classes = "max-height other-tabs"):
516
  gr.Markdown(
517
  """
518
 
519
+ For any question or press request, contact Théo Alves Da Costa at <b>[email protected]</b>
520
 
521
  - ClimateQ&A welcomes community contributions. To participate, head over to the Community Tab and create a "New Discussion" to ask questions and share your insights.
522
  - Provide feedback through email, letting us know which insights you found accurate, useful, or not. Your input will help us improve the platform.
 
554
  # openai_api_key_textbox.change(set_openai_api_key, inputs=[openai_api_key_textbox])
555
  # openai_api_key_textbox.submit(set_openai_api_key, inputs=[openai_api_key_textbox])
556
 
557
+ with gr.Tab("Sources",elem_classes = "max-height other-tabs"):
558
  gr.Markdown("""
559
  | Source | Report | URL | Number of pages | Release date |
560
  | --- | --- | --- | --- | --- |
 
595
  IPBES | Summary for Policymakers. Assessment Report on Land Degradation and Restoration. | https://zenodo.org/record/3237393/files/ipbes_assessment_report_ldra_EN.pdf | 48 | 2018
596
  """)
597
 
598
+ with gr.Tab("Carbon Footprint",elem_classes = "max-height other-tabs"):
599
  gr.Markdown("""
600
 
601
  Carbon emissions were measured during the development and inference process using CodeCarbon [https://github.com/mlco2/codecarbon](https://github.com/mlco2/codecarbon)
 
612
  """
613
  )
614
 
615
+ with gr.Tab("Changelog",elem_classes = "max-height other-tabs"):
616
  gr.Markdown("""
617
+
618
+ ##### Upcoming features
619
+ - Figures retrieval
620
+ - Conversational chat
621
+ - Intent routing
622
+ - Report filtering
623
+
624
+ ##### v1.2.0 - *2023-11-27
625
+ - Added new IPBES assessment on Invasive Species (SPM and chapters)
626
+ - Switched all the codebase to LCEL (Langchain Expression Language)
627
+ - Added sample questions by category
628
+ - Switched embeddings from old ``sentence-transformers/multi-qa-mpnet-base-dot-v1`` to ``BAAI/bge-base-en-v1.5``
629
 
630
  ##### v1.1.0 - *2023-10-16*
631
  - ClimateQ&A on Hugging Face is finally working again with all the new features !
 
642
  """
643
  )
644
 
645
+ # demo.queue(concurrency_count=16)
646
 
647
  demo.launch()