|
import os |
|
import gradio as gr |
|
from langchain.vectorstores import Chroma |
|
from transformers import pipeline |
|
from sentence_transformers import SentenceTransformer |
|
from langchain import LLMChain, PromptTemplate |
|
from langchain.llms import HuggingFacePipeline |
|
|
|
|
|
ANTI_BOT_PW = os.getenv("CORRECT_VALIDATE") |
|
PATH_WORK = "." |
|
CHROMA_DIR = "/chroma/kkg" |
|
CHROMA_PDF = './chroma/kkg/pdf' |
|
CHROMA_WORD = './chroma/kkg/word' |
|
CHROMA_EXCEL = './chroma/kkg/excel' |
|
|
|
|
|
hf_token = os.getenv("HF_READ") |
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_READ") |
|
|
|
|
|
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad", token=hf_token) |
|
|
|
|
|
llm = HuggingFacePipeline(pipeline=qa_pipeline) |
|
|
|
|
|
chroma_db = Chroma(embedding=embedding_model, persist_directory = PATH_WORK + CHROMA_DIR) |
|
|
|
|
|
llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, retriever=retriever) |
|
llm = HuggingFacePipeline(pipeline=llm_pipeline) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def document_retrieval_chroma2(): |
|
|
|
|
|
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR) |
|
print ("Chroma DB bereit ...................") |
|
|
|
return db |
|
|
|
|
|
|
|
def get_rag_response(question): |
|
|
|
docs = chroma_db.search(question, top_k=5) |
|
passages = [doc['text'] for doc in docs] |
|
links = [doc.get('url', 'No URL available') for doc in docs] |
|
|
|
|
|
context = " ".join(passages) |
|
qa_input = {"question": question, "context": context} |
|
answer = qa_pipeline(qa_input)['answer'] |
|
|
|
|
|
response = { |
|
"answer": answer, |
|
"documents": [{"link": link, "passage": passage} for link, passage in zip(links, passages)] |
|
} |
|
|
|
return response |
|
|
|
|
|
|
|
|
|
|
|
|
|
def chatbot_response(user_input, chat_history=[]): |
|
response = get_rag_response(user_input) |
|
answer = response['answer'] |
|
documents = response['documents'] |
|
|
|
doc_links = "\n\n".join([f"Link: {doc['link']}\nPassage: {doc['passage']}" for doc in documents]) |
|
|
|
bot_response = f"{answer}\n\nRelevant Documents:\n{doc_links}" |
|
|
|
chat_history.append((user_input, bot_response)) |
|
return chat_history, chat_history |
|
|
|
|
|
|
|
|
|
def user (user_input, history): |
|
return "", history + [[user_input, None]] |
|
|
|
with gr.Blocks() as chatbot: |
|
chat_interface = gr.Chatbot() |
|
msg = gr.Textbox() |
|
clear = gr.Button("Löschen") |
|
|
|
|
|
msg.submit(user, [msg, chat_interface], [msg, chat_interface], queue = False). then(chatbot_response, [msg, chat_interface], [chat_interface, chat_interface]) |
|
|
|
clear.click(lambda: None, None, chat_interface, queue=False) |
|
|
|
chatbot.launch() |