|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
|
|
import streamlit as st |
|
from dotenv import load_dotenv |
|
from PyPDF2 import PdfReader |
|
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter |
|
from langchain_experimental.text_splitter import SemanticChunker |
|
from langchain_community.embeddings import OpenAIEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.chat_models import ChatOpenAI |
|
from langchain.llms import HuggingFaceHub |
|
from langchain import hub |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.runnables import RunnablePassthrough |
|
from langchain_community.document_loaders import WebBaseLoader |
|
from langchain_core.prompts.prompt import PromptTemplate |
|
import altair as alt |
|
from session import set_partie_prenante |
|
import os |
|
from streamlit_vertical_slider import vertical_slider |
|
|
|
load_dotenv() |
|
|
|
def get_docs_from_website(urls): |
|
loader = WebBaseLoader(urls, header_template={ |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36', |
|
}) |
|
docs = loader.load() |
|
return docs |
|
|
|
def get_doc_chunks(docs): |
|
|
|
|
|
|
|
|
|
|
|
text_splitter = SemanticChunker(OpenAIEmbeddings()) |
|
|
|
docs = text_splitter.split_documents(docs) |
|
return docs |
|
|
|
|
|
def disp_test(): |
|
chart_data = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"]) |
|
st.scatter_chart(chart_data) |
|
|
|
def get_vectorstore_from_docs(doc_chunks): |
|
embedding = OpenAIEmbeddings(model="text-embedding-3-large") |
|
vectorstore = FAISS.from_documents(documents=doc_chunks, embedding=embedding) |
|
return vectorstore |
|
|
|
def get_conversation_chain(vectorstore): |
|
llm = ChatOpenAI(model="gpt-4o",temperature=0.5, max_tokens=2048) |
|
retriever=vectorstore.as_retriever() |
|
|
|
prompt = hub.pull("rlm/rag-prompt") |
|
|
|
rag_chain = ( |
|
{"context": retriever , "question": RunnablePassthrough()} |
|
| prompt |
|
| llm |
|
) |
|
return rag_chain |
|
|
|
|
|
def fill_promptQ_template(input_variables, template): |
|
prompt = PromptTemplate(input_variables=["BRAND_NAME","BRAND_DESCRIPTION"], template=template) |
|
return prompt.format(BRAND_NAME=input_variables["BRAND_NAME"], BRAND_DESCRIPTION=input_variables["BRAND_DESCRIPTION"]) |
|
|
|
template_extraction_PP = ''' |
|
Objectif : identifiez et proposez tout les noms de marques qui serviront comme partie prenante de la marque suivante pour développer un marketing de coopération (co-op marketing) |
|
|
|
Le nom de la marque de référence est le suivant : {BRAND_NAME} |
|
Son activité est la suivante : {BRAND_DESCRIPTION} |
|
|
|
TA REPONSE DOIT ETRE SOUS FORME DE LISTE DE NOMS DE MARQUES |
|
''' |
|
|
|
|
|
def text_to_list(text): |
|
lines = text.replace("- ","").split('\n') |
|
|
|
lines = [line.split() for line in lines] |
|
items = [[' '.join(line[:-1]),line[-1]] for line in lines] |
|
|
|
for item in items: |
|
item[1] = re.sub(r'\D', '', item[1]) |
|
return items |
|
|
|
def extract_pp(urls,input_variables): |
|
template_extraction_PP = ''' |
|
Objectif : identifiez et proposez tout les noms de marques qui serviront comme partie prenante de la marque suivante pour développer un marketing de coopération (co-op marketing) |
|
|
|
Le nom de la marque de référence est le suivant : {BRAND_NAME} |
|
Son activité est la suivante : {BRAND_DESCRIPTION} |
|
|
|
TA REPONSE DOIT ETRE SOUS FORME DE LISTE DE NOMS DE MARQUES |
|
''' |
|
|
|
|
|
docs = get_docs_from_website(urls) |
|
|
|
|
|
text_chunks = get_doc_chunks(docs) |
|
|
|
|
|
vectorstore = get_vectorstore_from_docs(text_chunks) |
|
|
|
chain = get_conversation_chain(vectorstore) |
|
|
|
question = fill_promptQ_template(input_variables, template_extraction_PP) |
|
|
|
response = chain.invoke(question) |
|
|
|
|
|
|
|
|
|
|
|
|
|
partie_prenante = response.content.replace("- ","").split('\n') |
|
|
|
return partie_prenante |
|
|
|
def disp_vertical_slider(partie_prenante): |
|
number_of_sliders = len(partie_prenante) |
|
st.set_page_config(layout="wide") |
|
st.subheader("Vertical Slider") |
|
st.title("Vertical Slider") |
|
st.write("This is a vertical slider example") |
|
bar = st.columns(number_of_sliders) |
|
for i in range(number_of_sliders): |
|
with bar[i]: |
|
tst = vertical_slider( |
|
label=partie_prenante[i], |
|
height=100, |
|
key=partie_prenante[i], |
|
default_value=50, |
|
thumb_color= "orange", |
|
step=1, |
|
min_value=0, |
|
max_value=100, |
|
value_always_visible=False, |
|
) |
|
st.write(tst) |
|
|
|
|
|
def display_pp(): |
|
load_dotenv() |
|
st.header("INDIQUEZ VOS PAGES WEB ET/OU DOCUMENTS D’ENTREPRISE POUR AUDITER LE CONTENU RSE") |
|
loaded = False |
|
option = st.radio("Source", ("A partir de votre site web", "A partir de vos documents entreprise")) |
|
|
|
if option == "A partir de votre site web": |
|
url1 = st.text_input("URL 1") |
|
brand_name = st.text_input("Nom de la marque") |
|
brand_description = st.text_area("Description de la marque") |
|
if st.button("Process") and loaded == False: |
|
loaded = True |
|
with st.spinner("Processing..."): |
|
input_variables = {"BRAND_NAME": brand_name, "BRAND_DESCRIPTION": brand_description} |
|
partie_prenante = extract_pp([url1], input_variables) |
|
partie_prenante = sorted(partie_prenante) |
|
set_partie_prenante(partie_prenante) |
|
st.write(pd.DataFrame(partie_prenante, columns=["Partie prenante"])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|