|
import os |
|
import gradio as gr |
|
from langchain.vectorstores import Chroma |
|
from transformers import RagTokenizer, RagSequenceForGeneration |
|
from sentence_transformers import SentenceTransformer |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.llms import HuggingFaceLLM |
|
|
|
|
|
ANTI_BOT_PW = os.getenv("CORRECT_VALIDATE") |
|
|
|
|
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_READ") |
|
|
|
|
|
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq", use_auth_token=True) |
|
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", use_auth_token=True) |
|
|
|
|
|
chroma_db = Chroma(embedding_model=embedding_model, persist_directory = PATH_WORK + CHROMA_DIR) |
|
|
|
|
|
llm = HuggingFaceLLM(model=model, tokenizer=tokenizer) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def document_retrieval_chroma2(): |
|
|
|
|
|
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR) |
|
print ("Chroma DB bereit ...................") |
|
|
|
return db |
|
|
|
|
|
|
|
def get_rag_response(question): |
|
|
|
docs = chroma_db.search(question, top_k=5) |
|
passages = [doc['text'] for doc in docs] |
|
links = [doc.get('url', 'No URL available') for doc in docs] |
|
|
|
|
|
answer = llm(question, docs) |
|
|
|
|
|
response = { |
|
"answer": answer, |
|
"documents": [{"link": link, "passage": passage} for link, passage in zip(links, passages)] |
|
} |
|
|
|
return response |
|
|
|
|
|
|
|
def chatbot_response (user_input, chat_history=[]): |
|
response = get_rag_response(user_input) |
|
answer = response['answer'] |
|
documents = response['documents'] |
|
doc_links = "\n\n".join([f"Link: {doc['link']} \nAuszüge der Dokumente: {doc['passage']}" for doc in documents]) |
|
bot_response = f"{answer} \n\nRelevante Dokumente: \n{doc_links}" |
|
|
|
chat_history.append((user_inptu, bot_response)) |
|
|
|
return chat_history, chat_history |
|
|
|
|
|
|
|
|
|
def user (user_input, history): |
|
return "", history + [[user_input, None]] |
|
|
|
with gr.Blocks() as chatbot: |
|
chat_interface = gr.Chatbot() |
|
msg = gr.Textbox() |
|
clear = gr.Button("Löschen") |
|
|
|
|
|
msg.submit(user, [msg, chat_interface], [msg, chat_interface], queue = False). then(chatbot_response, [msg, chat_interface], [chat_interface, chat_interface]) |
|
|
|
clear.click(lambda: None, None, chat_interface, queue=False) |
|
|
|
chatbot.launch() |