from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS def process_documents(docs): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) text_chunks = text_splitter.split_documents(docs) # print(*text_chunks, sep='\n\n') embeddings = HuggingFaceEmbeddings(model_name="LazarusNLP/congen-indo-e5-small", model_kwargs={'device': 'cpu'}) vector_store = FAISS.from_documents(text_chunks, embedding=embeddings) return vector_store