Spaces:
Running
Running
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
def process_documents(docs): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
text_chunks = text_splitter.split_documents(docs) | |
# print(*text_chunks, sep='\n\n') | |
embeddings = HuggingFaceEmbeddings(model_name="LazarusNLP/congen-indo-e5-small", | |
model_kwargs={'device': 'cpu'}) | |
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings) | |
return vector_store | |