from langchain_core.pydantic_v1 import BaseModel, Field from langchain.prompts.prompt import PromptTemplate from langchain.output_parsers import PydanticOutputParser from typing import Literal from operator import itemgetter import json from langchain_core.exceptions import OutputParserException class ESRSAnalysis(BaseModel): """Analyzing the user query to get ESRS type, sources and intent""" esrs_type: Literal[ "ESRS 1", "ESRS 2", "ESRS E1", "ESRS E2", "ESRS E3", "ESRS E4", "ESRS E5", "ESRS S1", "ESRS S2", "ESRS S3", "ESRS S4", "ESRS G1", "no_intent", ] = Field( description="""The ESRS type that the user query refers to.""", ) def make_esrs_intent_chain(llm): prompt_template = """ Please analyze the question and indicate if it refers to a specific ESRS. Follow these definitions in order to choose the appropriate ESRS : - ESRS 1 is for questions about general principles for preparing and presenting sustainability information in accordance with CSRD - ESRS 2 is for questions about general disclosures related to sustainability reporting, including governance, strategy, impact, risk, opportunity management, and metrics and targets - ESRS E1 is for questions about climate change, global warming, GES and energy - ESRS E2 is for questions about air, water, and soil pollution, and dangerous substances - ESRS E3 is for questions about water and marine resources - ESRS E4 is for questions about biodiversity, nature, wildlife and ecosystems - ESRS E5 is for questions about resource use and circular economy - ESRS S1 is for questions about workforce and labor issues, job security, fair pay, and health and safety - ESRS S2 is for questions about workers in the value chain, workers' treatment - ESRS S3 is for questions about affected communities, impact on local communities - ESRS S4 is for questions about consumers and end users, customer privacy, safety, and inclusion - ESRS G1 is for questions about governance, risk management, internal control, and business conduct - no_intent is for questions that do not fit into any of the above categories Keep in mind these guidelines : - Some questions could be related to multiple ESRS. In such case, choose the most appropriate one. The output needs to respect a JSON format with 'esrs_type' as the key and the appropriate ESRS as the value. Question: '{query}' Answer: """ parser = PydanticOutputParser(pydantic_object=ESRSAnalysis, method="json_mode") prompt = PromptTemplate(template=prompt_template, input_variables=["query"]) chain = {"query": itemgetter("query")} | prompt | llm | parser return chain def make_esrs_intent_node(llm): def intent_message(state): query = state["query"] categorization_chain = make_esrs_intent_chain(llm) output = { "esrs_type": [categorization_chain.invoke({"query": query}).esrs_type] } return output return intent_message # intent: str = Field( # enum=[ # "Specific topic", # "Implementation reco", # "KPI extraction", # ], # description=""" # Categorize the user query in one of the following categories, # Examples: # - Specific topic: "What are the specificities of ESRS E1 ?" # - Implementation reco: "How should I compute my scope 1 reduction target ?" # - KPI extraction: "When will the CSRD be mandatory for my small French company ?" # """, # ) # sources: str = Field( # enum=["ESRS", "External"], # description=""" # Given a user question choose which documents would be most relevant for answering their question, # - ESRS is for questions about a specific environmental, social or governance topic, as well as CSRD's general principles and disclosures # - External is for questions about how to implement the CSRD, or general questions about CSRD's context # """, # )