Spaces:
Sleeping
Sleeping
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_community.chat_models import ChatOllama | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_pinecone import PineconeVectorStore | |
from langchain_community.embeddings import SentenceTransformerEmbeddings | |
import os | |
from dotenv import load_dotenv | |
from langchain.retrievers import BM25Retriever, EnsembleRetriever | |
from kiwipiepy import Kiwi | |
load_dotenv() | |
kiwi = Kiwi() | |
def kiwi_tokenize(text): | |
return [token.form for token in kiwi.tokenize(text)] | |
# embedding_model = SentenceTransformerEmbeddings(model_name='BM-K/KoSimCSE-roberta-multitask', model_kwargs={"trust_remote_code":True}) | |
def retriever(pc, bm25): | |
pcretriever = pc.as_retriever(search_kwargs={'k':4}) | |
kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize) | |
kiwi_bm25.k=4 | |
kiwibm25_pc_37 = EnsembleRetriever( | |
retrievers=[kiwi_bm25, pcretriever], # ์ฌ์ฉํ ๊ฒ์ ๋ชจ๋ธ์ ๋ฆฌ์คํธ | |
weights=[0.3, 0.7], # ๊ฐ ๊ฒ์ ๋ชจ๋ธ์ ๊ฒฐ๊ณผ์ ์ ์ฉํ ๊ฐ์ค์น | |
search_type="mmr", # ๊ฒ์ ๊ฒฐ๊ณผ์ ๋ค์์ฑ์ ์ฆ์ง์ํค๋ MMR ๋ฐฉ์์ ์ฌ์ฉ | |
) | |
# Pinecone vector store ์ด๊ธฐํ | |
# vectorstore = PineconeVectorStore( | |
# index_name=os.getenv("INDEX_NAME"), embedding=embedding_model | |
# ) | |
# retriever = vectorstore.as_retriever(search_kwargs={'k': 2}) | |
return kiwibm25_pc_37 |