Carto-RSE / partie_prenante_carte.py
Ilyas KHIAT
cartographie
b7289c6
raw
history blame
7.17 kB
import streamlit as st
import pandas as pd
import numpy as np
import re
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.chat_models import ChatOpenAI
from langchain.llms import HuggingFaceHub
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts.prompt import PromptTemplate
import altair as alt
from session import set_partie_prenante
import os
from streamlit_vertical_slider import vertical_slider
load_dotenv()
def get_docs_from_website(urls):
loader = WebBaseLoader(urls, header_template={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
})
docs = loader.load()
return docs
def get_doc_chunks(docs):
# Split the loaded data
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=500,
# chunk_overlap=100)
text_splitter = SemanticChunker(OpenAIEmbeddings())
docs = text_splitter.split_documents(docs)
return docs
def disp_test():
chart_data = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
st.scatter_chart(chart_data)
def get_vectorstore_from_docs(doc_chunks):
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
vectorstore = FAISS.from_documents(documents=doc_chunks, embedding=embedding)
return vectorstore
def get_conversation_chain(vectorstore):
llm = ChatOpenAI(model="gpt-4o",temperature=0.5, max_tokens=2048)
retriever=vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
# Chain
rag_chain = (
{"context": retriever , "question": RunnablePassthrough()}
| prompt
| llm
)
return rag_chain
# FILL THE PROMPT FOR THE QUESTION VARIABLE THAT WILL BE USED IN THE RAG PROMPT, ATTENTION NOT CONFUSE WITH THE RAG PROMPT
def fill_promptQ_template(input_variables, template):
prompt = PromptTemplate(input_variables=["BRAND_NAME","BRAND_DESCRIPTION"], template=template)
return prompt.format(BRAND_NAME=input_variables["BRAND_NAME"], BRAND_DESCRIPTION=input_variables["BRAND_DESCRIPTION"])
template_extraction_PP = '''
Objectif : identifiez et proposez tout les noms de marques qui serviront comme partie prenante de la marque suivante pour développer un marketing de coopération (co-op marketing)
Le nom de la marque de référence est le suivant : {BRAND_NAME}
Son activité est la suivante : {BRAND_DESCRIPTION}
TA REPONSE DOIT ETRE SOUS FORME DE LISTE DE NOMS DE MARQUES
'''
#don't forget to add the input variables from the maim function
def text_to_list(text):
lines = text.replace("- ","").split('\n')
lines = [line.split() for line in lines]
items = [[' '.join(line[:-1]),line[-1]] for line in lines]
# Assuming `items` is the list of items
for item in items:
item[1] = re.sub(r'\D', '', item[1])
return items
def extract_pp(urls,input_variables):
template_extraction_PP = '''
Objectif : identifiez et proposez tout les noms de marques qui serviront comme partie prenante de la marque suivante pour développer un marketing de coopération (co-op marketing)
Le nom de la marque de référence est le suivant : {BRAND_NAME}
Son activité est la suivante : {BRAND_DESCRIPTION}
TA REPONSE DOIT ETRE SOUS FORME DE LISTE DE NOMS DE MARQUES
'''
#don't forget to add the input variables from the maim function
docs = get_docs_from_website(urls)
#get text chunks
text_chunks = get_doc_chunks(docs)
#create vectorstore
vectorstore = get_vectorstore_from_docs(text_chunks)
chain = get_conversation_chain(vectorstore)
question = fill_promptQ_template(input_variables, template_extraction_PP)
response = chain.invoke(question)
# version plus poussée a considérer
# each item in the list is a list with the name of the brand and the similarity percentage
#partie_prenante = text_to_list(response.content)
#version simple
partie_prenante = response.content.replace("- ","").split('\n')
return partie_prenante
def disp_vertical_slider(partie_prenante):
number_of_sliders = len(partie_prenante)
st.set_page_config(layout="wide")
st.subheader("Vertical Slider")
st.title("Vertical Slider")
st.write("This is a vertical slider example")
bar = st.columns(number_of_sliders)
for i in range(number_of_sliders):
with bar[i]:
tst = vertical_slider(
label=partie_prenante[i],
height=100,
key=partie_prenante[i],
default_value=50,
thumb_color= "orange", #Optional - Defaults to Streamlit Red
step=1,
min_value=0,
max_value=100,
value_always_visible=False,
)
st.write(tst)
def display_pp():
load_dotenv()
st.header("INDIQUEZ VOS PAGES WEB ET/OU DOCUMENTS D’ENTREPRISE POUR AUDITER LE CONTENU RSE")
loaded = False
option = st.radio("Source", ("A partir de votre site web", "A partir de vos documents entreprise"))
if option == "A partir de votre site web":
url1 = st.text_input("URL 1")
brand_name = st.text_input("Nom de la marque")
brand_description = st.text_area("Description de la marque")
if st.button("Process") and loaded == False:
loaded = True
with st.spinner("Processing..."):
input_variables = {"BRAND_NAME": brand_name, "BRAND_DESCRIPTION": brand_description}
partie_prenante = extract_pp([url1], input_variables)
partie_prenante = sorted(partie_prenante)
set_partie_prenante(partie_prenante)
st.write(pd.DataFrame(partie_prenante, columns=["Partie prenante"]))
# alphabet = [ pp[0] for pp in partie_prenante]
# pouvoir = [ 50 for _ in range(len(partie_prenante))]
# df = pd.DataFrame({'partie_prenante': partie_prenante, 'pouvoir': pouvoir, 'code couleur': partie_prenante})
# st.write(df)
# c = (
# alt.Chart(df)
# .mark_circle(size=300)
# .encode(x="partie_prenante", y=alt.Y("pouvoir",scale=alt.Scale(domain=[0,100])), color="code couleur")
# )
# st.subheader("Vertical Slider")
# age = st.slider("How old are you?", 0, 130, 25)
# st.write("I'm ", age, "years old")
# disp_vertical_slider(partie_prenante)
# st.altair_chart(c, use_container_width=True)