momenaca commited on
Commit
e9c0973
·
1 Parent(s): d3ba383

first commit

Browse files
Files changed (8) hide show
  1. .gitignore +7 -0
  2. README.md +4 -4
  3. app.py +249 -0
  4. assets/style.css +361 -0
  5. eki_esrsqa/utils.py +89 -0
  6. poetry.lock +0 -0
  7. pyproject.toml +24 -0
  8. requirements.txt +0 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/app.cpython-38.pyc
3
+ __pycache__/app.cpython-39.pyc
4
+ __pycache__/utils.cpython-38.pyc
5
+
6
+ notebooks/
7
+ *.pyc
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Esrs Question Answering
3
- emoji:
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
  app_file: app.py
 
1
  ---
2
+ title: Eki Esrsqa
3
+ emoji: 🐠
4
+ colorFrom: red
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import gradio as gr
4
+ from operator import itemgetter
5
+ from pinecone import Pinecone
6
+ from huggingface_hub import whoami
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain.schema.output_parser import StrOutputParser
9
+ from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
10
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
11
+ from langchain_openai import AzureChatOpenAI
12
+ from langchain.prompts.prompt import PromptTemplate
13
+ from langchain.memory import ConversationBufferMemory
14
+ from langchain_community.vectorstores import Pinecone as PineconeVectorstore
15
+ from eki_esrsqa.utils import (
16
+ make_html_source,
17
+ make_pairs,
18
+ _format_chat_history,
19
+ _combine_documents,
20
+ get_llm,
21
+ init_env,
22
+ )
23
+
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ init_env()
29
+ chat_model_init = get_llm()
30
+ demo_name = "ESRS_QA"
31
+ hf_model = "BAAI/bge-base-en-v1.5"
32
+
33
+ embeddings = HuggingFaceBgeEmbeddings(
34
+ model_name=hf_model,
35
+ encode_kwargs={"normalize_embeddings": True},
36
+ )
37
+
38
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
39
+ index = pc.Index(os.getenv("PINECONE_API_INDEX"))
40
+ vectorstore = PineconeVectorstore(index, embeddings, "page_content")
41
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
42
+ chat_model = AzureChatOpenAI()
43
+ esrs_wiki = """
44
+
45
+ The Corporate Sustainability Reporting Directive (CSRD) is a mandate that requires all companies to report on their sustainability initiatives. In response to this directive, the European Sustainability Reporting Standards (ESRS) were developed. These standards are a key tool in promoting the transition to a sustainable economy within the EU, providing a structured framework for companies to disclose their sustainability initiatives. The ESRS cover a wide range of environmental, social, and governance (ESG) issues, including climate change, biodiversity, and human rights. Companies that adhere to the ESRS can provide investors with valuable insights into their sustainability impact, thereby informing investment decisions. The ESRS are designed to be highly interoperable with global reporting standards, which helps to avoid unnecessary duplication in reporting by companies. The reporting requirements based on the ESRS will be gradually implemented for different companies over time. In summary, the ESRS play a critical role in fostering sustainable finance and enabling companies to demonstrate their commitment to the green deal agenda while accessing sustainable finance.
46
+
47
+ ---
48
+
49
+ """
50
+
51
+ reformulation_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
52
+
53
+ Chat History:
54
+ {chat_history}
55
+ Follow Up Input: {question}
56
+ Standalone question:"""
57
+
58
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(reformulation_template)
59
+
60
+ answering_template = """
61
+ You are an ESG expert, with 20 years experience analyzing corporate sustainability reports.
62
+ You are specialist in the upcoming CSRD regulation and in general with corporate sustainability disclosure requirements.
63
+ {esrs_wiki}
64
+
65
+ You will answer the question based on the following passages extracted from CSRD specific sustainability guidelines and reports:
66
+ ```
67
+ {context}
68
+ ```
69
+
70
+ Guidelines:
71
+ 1. Context: You'll receive relevant excerpts from a CSRD-specific sustainability guideline or report to address a given question.
72
+ 2. Relevance: Only include passages directly pertaining to the question; omit irrelevant content.
73
+ 3. Facts and Figures: Prioritize factual information in your response.
74
+ 4. Conciseness: Keep answers sharp and succinct, avoiding unnecessary context.
75
+ 5. Focus: Address the specific question without veering into related topics.
76
+ 6. Honesty: If unsure, state that you don't know rather than inventing an answer.
77
+ 7. Source Attribution: When using information from a passage, mention it as [Doc i] at the end of the sentence (where 'i' represents the document number).
78
+ 8. Multiple Sources: If the same content appears in multiple documents, cite them collectively (e.g., [Doc i, Doc j, Doc k]).
79
+ 9. Structured Paragraphs: Instead of bullet-point summaries, compile your responses into well-structured paragraphs.
80
+ 10. Method Focus: When addressing "how" questions, emphasize methods and procedures over outcomes.
81
+ 11. Selective Usage: You're not obligated to use every passage; include only those relevant to the question.
82
+ 12. Insufficient Information: If documents lack necessary details, indicate that you don't have enough information.
83
+
84
+ Question: {question}
85
+ Answer:
86
+ """
87
+
88
+ ANSWER_PROMPT = ChatPromptTemplate.from_template(answering_template)
89
+
90
+ DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
91
+ memory = ConversationBufferMemory(
92
+ return_messages=True, output_key="answer", input_key="question"
93
+ )
94
+
95
+ # First we add a step to load memory
96
+ # This adds a "memory" key to the input object
97
+ loaded_memory = RunnablePassthrough.assign(
98
+ chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
99
+ )
100
+ # Now we calculate the standalone question
101
+ standalone_question = {
102
+ "standalone_question": {
103
+ "question": lambda x: x["question"],
104
+ "chat_history": lambda x: _format_chat_history(x["chat_history"]),
105
+ }
106
+ | CONDENSE_QUESTION_PROMPT
107
+ | chat_model
108
+ | StrOutputParser(),
109
+ }
110
+ # Now we retrieve the documents
111
+ retrieved_documents = {
112
+ "docs": itemgetter("standalone_question") | retriever,
113
+ "question": lambda x: x["standalone_question"],
114
+ }
115
+ # Now we construct the inputs for the final prompt
116
+ final_inputs = {
117
+ "context": lambda x: _combine_documents(x["docs"], DEFAULT_DOCUMENT_PROMPT),
118
+ "question": itemgetter("question"),
119
+ "esrs_wiki": lambda x: esrs_wiki,
120
+ }
121
+ # And finally, we do the part that returns the answers
122
+ answer = {
123
+ "answer": final_inputs | ANSWER_PROMPT | chat_model,
124
+ "docs": itemgetter("docs"),
125
+ }
126
+ # And now we put it all together!
127
+ final_chain = loaded_memory | standalone_question | retrieved_documents | answer
128
+
129
+
130
+ async def chat(
131
+ query: str,
132
+ history: list = [],
133
+ ):
134
+ """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
135
+ (messages in gradio format, messages in langchain format, source documents)"""
136
+ source_string = ""
137
+ gradio_format = make_pairs([a.content for a in history]) + [(query, "")]
138
+
139
+ # reset memory
140
+ memory.clear()
141
+ for message in history:
142
+ memory.chat_memory.add_message(message)
143
+
144
+ inputs = {"question": query}
145
+ result = final_chain.astream_log({"question": query})
146
+ reformulated_question_path_id = "/logs/AzureChatOpenAI/streamed_output_str/-" # "/logs/ChatGroq/streamed_output_str/-"
147
+ retriever_path_id = "/logs/Retriever/final_output"
148
+ final_answer_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-" # "/logs/ChatGroq:2/streamed_output_str/-"
149
+
150
+ async for op in result:
151
+ op = op.ops[0]
152
+ if op["path"] == reformulated_question_path_id: # reforulated question
153
+ new_token = op["value"] # str
154
+
155
+ elif op["path"] == retriever_path_id: # documents
156
+ sources = op["value"]["documents"] # List[Document]
157
+ source_string = "\n\n".join(
158
+ [(make_html_source(i, doc)) for i, doc in enumerate(sources, 1)]
159
+ )
160
+
161
+ # if doc.metadata["source"] == "ESRS"
162
+ # else make_html_source(i, doc)
163
+
164
+ elif op["path"] == final_answer_path_id: # final answer
165
+ new_token = op["value"] # str
166
+ answer_yet = gradio_format[-1][1]
167
+ gradio_format[-1] = (query, answer_yet + new_token)
168
+
169
+ yield "", gradio_format, history, source_string
170
+
171
+ memory.save_context(inputs, {"answer": gradio_format[-1][1]})
172
+ yield "", gradio_format, memory.load_memory_variables({})["history"], source_string
173
+
174
+
175
+ with open("./assets/style.css", "r") as f:
176
+ css = f.read()
177
+
178
+
179
+ def update_visible(oauth_token: gr.OAuthToken | None):
180
+ if oauth_token is None:
181
+ return {
182
+ bloc_1: gr.update(visible=True),
183
+ bloc_2: gr.update(visible=False),
184
+ bloc_3: gr.update(visible=False),
185
+ }
186
+
187
+ org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
188
+ logger.info(org_names)
189
+ if "ekimetrics-esrsqa" in org_names: # logged in group
190
+ return {
191
+ bloc_1: gr.update(visible=False),
192
+ bloc_2: gr.update(visible=True),
193
+ bloc_3: gr.update(visible=False),
194
+ }
195
+
196
+ else: # logged but not in group
197
+ return {
198
+ bloc_1: gr.update(visible=False),
199
+ bloc_2: gr.update(visible=False),
200
+ bloc_3: gr.update(visible=True),
201
+ }
202
+
203
+
204
+ with gr.Blocks(title=f"{demo_name}", css=css) as demo:
205
+ gr.LoginButton()
206
+ gr.Markdown(f"<h1><center>{demo_name}</center></h1>")
207
+
208
+ with gr.Column() as bloc_1:
209
+ textbox_1 = gr.Textbox("You are not logged to Hugging Face !", show_label=False)
210
+
211
+ with gr.Column(visible=False) as bloc_3:
212
+ textbox_3 = gr.Textbox(
213
+ "You are not part of the ESRSQA Project, ask access here : https://huggingface.co/ekimetrics-esrsqa"
214
+ )
215
+
216
+ with gr.Column(visible=False) as bloc_2:
217
+ with gr.Row():
218
+ with gr.Column(scale=2):
219
+ chatbot = gr.Chatbot(
220
+ elem_id="chatbot", label=f"{demo_name} chatbot", show_label=False
221
+ )
222
+ state = gr.State([])
223
+
224
+ with gr.Row():
225
+ ask = gr.Textbox(
226
+ show_label=False,
227
+ placeholder="Input your question then press enter",
228
+ )
229
+
230
+ with gr.Column(scale=1, variant="panel"):
231
+ gr.Markdown("### Sources")
232
+ sources_textbox = gr.Markdown(show_label=False)
233
+
234
+ ask.submit(
235
+ fn=chat,
236
+ inputs=[
237
+ ask,
238
+ state,
239
+ ],
240
+ outputs=[ask, chatbot, state, sources_textbox],
241
+ )
242
+ demo.load(update_visible, inputs=None, outputs=[bloc_1, bloc_2, bloc_3])
243
+
244
+
245
+ demo.launch(
246
+ share=True,
247
+ # auth=("", ""),
248
+ debug=True,
249
+ )
assets/style.css ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --user-image: url('https://miro.medium.com/v2/resize:fit:720/format:webp/1*yfAP6bXjeL2oO3AAMepVOg.png');
3
+ }
4
+
5
+ .warning-box {
6
+ background-color: #fff3cd;
7
+ border: 1px solid #ffeeba;
8
+ border-radius: 4px;
9
+ padding: 15px 20px;
10
+ font-size: 14px;
11
+ color: #856404;
12
+ display: inline-block;
13
+ margin-bottom: 15px;
14
+ }
15
+
16
+
17
+ .tip-box {
18
+ background-color: #f0f9ff;
19
+ border: 1px solid #80d4fa;
20
+ border-radius: 4px;
21
+ margin-top:20px;
22
+ padding: 15px 20px;
23
+ font-size: 14px;
24
+ display: inline-block;
25
+ margin-bottom: 15px;
26
+ width: auto;
27
+ color:black !important;
28
+ }
29
+
30
+ body.dark .warning-box * {
31
+ color:black !important;
32
+ }
33
+
34
+
35
+ body.dark .tip-box * {
36
+ color:black !important;
37
+ }
38
+
39
+
40
+ .tip-box-title {
41
+ font-weight: bold;
42
+ font-size: 14px;
43
+ margin-bottom: 5px;
44
+ }
45
+
46
+ .light-bulb {
47
+ display: inline;
48
+ margin-right: 5px;
49
+ }
50
+
51
+ .gr-box {border-color: #d6c37c}
52
+
53
+ #hidden-message{
54
+ display:none;
55
+ }
56
+
57
+ .message{
58
+ font-size:14px !important;
59
+ }
60
+
61
+
62
+ a {
63
+ text-decoration: none;
64
+ color: inherit;
65
+ }
66
+
67
+ .card {
68
+ background-color: white;
69
+ border-radius: 10px;
70
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
71
+ overflow: hidden;
72
+ display: flex;
73
+ flex-direction: column;
74
+ margin:20px;
75
+ }
76
+
77
+ .card-content {
78
+ padding: 20px;
79
+ }
80
+
81
+ .card-content h2 {
82
+ font-size: 14px !important;
83
+ font-weight: bold;
84
+ margin-bottom: 10px;
85
+ margin-top:0px !important;
86
+ color:#dc2626!important;;
87
+ }
88
+
89
+ .card-content p {
90
+ font-size: 12px;
91
+ margin-bottom: 0;
92
+ }
93
+
94
+ .card-footer {
95
+ background-color: #f4f4f4;
96
+ font-size: 10px;
97
+ padding: 10px;
98
+ display: flex;
99
+ justify-content: space-between;
100
+ align-items: center;
101
+ }
102
+
103
+ .card-footer span {
104
+ flex-grow: 1;
105
+ text-align: left;
106
+ color: #999 !important;
107
+ }
108
+
109
+ .pdf-link {
110
+ display: inline-flex;
111
+ align-items: center;
112
+ margin-left: auto;
113
+ text-decoration: none!important;
114
+ font-size: 14px;
115
+ }
116
+
117
+
118
+
119
+ .message.user{
120
+ /* background-color:#7494b0 !important; */
121
+ border:none;
122
+ /* color:white!important; */
123
+ }
124
+
125
+ .message.bot{
126
+ /* background-color:#f2f2f7 !important; */
127
+ border:none;
128
+ }
129
+
130
+ /* .gallery-item > div:hover{
131
+ background-color:#7494b0 !important;
132
+ color:white!important;
133
+ }
134
+ .gallery-item:hover{
135
+ border:#7494b0 !important;
136
+ }
137
+ .gallery-item > div{
138
+ background-color:white !important;
139
+ color:#577b9b!important;
140
+ }
141
+ .label{
142
+ color:#577b9b!important;
143
+ } */
144
+
145
+ /* .paginate{
146
+ color:#577b9b!important;
147
+ } */
148
+
149
+
150
+
151
+ /* span[data-testid="block-info"]{
152
+ background:none !important;
153
+ color:#577b9b;
154
+ } */
155
+
156
+ /* Pseudo-element for the circularly cropped picture */
157
+ /* .message.bot::before {
158
+ content: '';
159
+ position: absolute;
160
+ top: -10px;
161
+ left: -10px;
162
+ width: 30px;
163
+ height: 30px;
164
+ background-image: var(--user-image);
165
+ background-size: cover;
166
+ background-position: center;
167
+ border-radius: 50%;
168
+ z-index: 10;
169
+ }
170
+ */
171
+
172
+ label.selected{
173
+ background:none !important;
174
+ }
175
+
176
+ #submit-button{
177
+ padding:0px !important;
178
+ }
179
+
180
+
181
+ @media screen and (min-width: 1024px) {
182
+ div#tab-examples{
183
+ height:calc(100vh - 190px) !important;
184
+ overflow-y: auto;
185
+ }
186
+
187
+ div#sources-textbox{
188
+ height:calc(100vh - 190px) !important;
189
+ overflow-y: auto !important;
190
+ }
191
+
192
+ div#tab-config{
193
+ height:calc(100vh - 190px) !important;
194
+ overflow-y: auto !important;
195
+ }
196
+
197
+ div#chatbot-row{
198
+ height:calc(100vh - 90px) !important;
199
+ }
200
+
201
+ div#chatbot{
202
+ height:calc(100vh - 170px) !important;
203
+ }
204
+
205
+ .max-height{
206
+ height:calc(100vh - 90px) !important;
207
+ overflow-y: auto;
208
+ }
209
+
210
+ /* .tabitem:nth-child(n+3) {
211
+ padding-top:30px;
212
+ padding-left:40px;
213
+ padding-right:40px;
214
+ } */
215
+ }
216
+
217
+ footer {
218
+ visibility: hidden;
219
+ display:none !important;
220
+ }
221
+
222
+
223
+ @media screen and (max-width: 767px) {
224
+ /* Your mobile-specific styles go here */
225
+
226
+ div#chatbot{
227
+ height:500px !important;
228
+ }
229
+
230
+ #submit-button{
231
+ padding:0px !important;
232
+ min-width: 80px;
233
+ }
234
+
235
+ /* This will hide all list items */
236
+ div.tab-nav button {
237
+ display: none !important;
238
+ }
239
+
240
+ /* This will show only the first list item */
241
+ div.tab-nav button:first-child {
242
+ display: block !important;
243
+ }
244
+
245
+ /* This will show only the first list item */
246
+ div.tab-nav button:nth-child(2) {
247
+ display: block !important;
248
+ }
249
+
250
+ #right-panel button{
251
+ display: block !important;
252
+ }
253
+
254
+ /* ... add other mobile-specific styles ... */
255
+ }
256
+
257
+
258
+ body.dark .card{
259
+ background-color: #374151;
260
+ }
261
+
262
+ body.dark .card-content h2{
263
+ color:#f4dbd3 !important;
264
+ }
265
+
266
+ body.dark .card-footer {
267
+ background-color: #404652;
268
+ }
269
+
270
+ body.dark .card-footer span {
271
+ color:white !important;
272
+ }
273
+
274
+
275
+ .doc-ref{
276
+ color:#dc2626!important;
277
+ margin-right:1px;
278
+ }
279
+
280
+ .tabitem{
281
+ border:none !important;
282
+ }
283
+
284
+ .other-tabs > div{
285
+ padding-left:40px;
286
+ padding-right:40px;
287
+ padding-top:10px;
288
+ }
289
+
290
+ .gallery-item > div{
291
+ white-space: normal !important; /* Allow the text to wrap */
292
+ word-break: break-word !important; /* Break words to prevent overflow */
293
+ overflow-wrap: break-word !important; /* Break long words if necessary */
294
+ }
295
+
296
+ span.chatbot > p > img{
297
+ margin-top:40px !important;
298
+ max-height: none !important;
299
+ max-width: 80% !important;
300
+ border-radius:0px !important;
301
+ }
302
+
303
+
304
+ .chatbot-caption{
305
+ font-size:11px;
306
+ font-style:italic;
307
+ color:#508094;
308
+ }
309
+
310
+ .ai-generated{
311
+ font-size:11px!important;
312
+ font-style:italic;
313
+ color:#73b8d4 !important;
314
+ }
315
+
316
+ .card-image > .card-content{
317
+ background-color:#f1f7fa !important;
318
+ }
319
+
320
+
321
+
322
+ .tab-nav > button.selected{
323
+ color:#4b8ec3;
324
+ font-weight:bold;
325
+ border:none;
326
+ }
327
+
328
+ .tab-nav{
329
+ border:none !important;
330
+ }
331
+
332
+ #input-textbox > label > textarea{
333
+ border-radius:40px;
334
+ padding-left:30px;
335
+ resize:none;
336
+ }
337
+
338
+ #input-message > div{
339
+ border:none;
340
+ }
341
+
342
+ #dropdown-samples{
343
+ /*! border:none !important; */
344
+ /*! border-width:0px !important; */
345
+ background:none !important;
346
+
347
+ }
348
+
349
+ #dropdown-samples > .container > .wrap{
350
+ background-color:white;
351
+ }
352
+
353
+
354
+ #tab-examples > div > .form{
355
+ border:none;
356
+ background:none !important;
357
+ }
358
+
359
+ .a-doc-ref{
360
+ text-decoration: none !important;
361
+ }
eki_esrsqa/utils.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import Tuple, List
4
+ from dotenv import load_dotenv
5
+ from msal import ConfidentialClientApplication
6
+ from langchain_openai import AzureChatOpenAI
7
+ from langchain.schema import format_document
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def init_env():
13
+ try:
14
+ logger.info("Loaded local env")
15
+ load_dotenv()
16
+ except:
17
+ logger.info("Loaded HF env")
18
+ pass
19
+
20
+
21
+ def get_token() -> str | None:
22
+ app = ConfidentialClientApplication(
23
+ client_id=os.getenv("CLIENT_ID"),
24
+ client_credential=os.getenv("CLIENT_SECRET"),
25
+ authority=f"https://login.microsoftonline.com/{os.getenv('TENANT_ID')}",
26
+ )
27
+ result = app.acquire_token_for_client(scopes=[os.getenv("SCOPE")])
28
+ if result is not None:
29
+ return result["access_token"]
30
+
31
+
32
+ def get_llm():
33
+ os.environ["OPENAI_API_KEY"] = get_token()
34
+ os.environ["AZURE_OPENAI_ENDPOINT"] = (
35
+ f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_ID')}/chat/completions?api-version={os.getenv('OPENAI_API_VERSION')}"
36
+ )
37
+
38
+ return AzureChatOpenAI()
39
+
40
+
41
+ def _combine_documents(docs, document_prompt, document_separator="\n\n"):
42
+ doc_strings = [
43
+ f"Document {i}: \n'''\n{format_document(doc, document_prompt)}\n'''"
44
+ for i, doc in enumerate(docs, 1)
45
+ ]
46
+ return document_separator.join(doc_strings)
47
+
48
+
49
+ def _format_chat_history(chat_history: List[Tuple]) -> str:
50
+ turn = 1
51
+ buffer = []
52
+ for dialogue in chat_history:
53
+ buffer.append(("Human: " if turn else "Assistant: ") + dialogue.content)
54
+ turn ^= 1
55
+ return "\n".join(buffer) + "\n"
56
+
57
+
58
+ def make_pairs(lst):
59
+ """from a list of even lenght, make tupple pairs"""
60
+ return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
61
+
62
+
63
+ def make_html_source(i, doc):
64
+ if doc.metadata["source"] == "ESRS":
65
+ return f"""
66
+ <div class="card">
67
+ <div class="card-content">
68
+ <h3>Doc {i}</h2>
69
+ <p>{doc.page_content}</p>
70
+ </div>
71
+ <div class="card-footer">
72
+ <span>{doc.metadata['ESRS']} \n</span>
73
+ <span>DR: {doc.metadata['DR']} \n</span>
74
+ <span>Data type: {doc.metadata['Data type']} \n</span>
75
+ </div>
76
+ </div>
77
+ """
78
+ else:
79
+ return f"""
80
+ <div class="card">
81
+ <div class="card-content">
82
+ <h3>Doc {i}</h2>
83
+ <p>{doc.page_content}</p>
84
+ </div>
85
+ <div class="card-footer">
86
+ <span>Source: {doc.metadata['source']} \n</span>
87
+ </div>
88
+ </div>
89
+ """
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "eki-esrsqa"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Miguel Omenaca Muro <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = ">=3.10,<3.13"
10
+ langchain = "^0.2.5"
11
+ gradio = {extras = ["oauth"], version = "^4.36.1"}
12
+ sentence-transformers = "^3.0.1"
13
+ langchain-community = "^0.2.5"
14
+ msal = "^1.28.1"
15
+ loadenv = "^0.1.1"
16
+ openai = "^1.34.0"
17
+ langchain-openai = "^0.1.8"
18
+ pinecone = "^4.0.0"
19
+ pinecone-client = "^4.1.1"
20
+
21
+
22
+ [build-system]
23
+ requires = ["poetry-core"]
24
+ build-backend = "poetry.core.masonry.api"
requirements.txt ADDED
The diff for this file is too large to render. See raw diff