import datasets import easyknn from sentence_transformers import SentenceTransformer ds = datasets.load_dataset( "mridul3301/nepali-text-corpus-64", split="train", streaming=True ) ds = ds.take(5000) model = SentenceTransformer("jangedoo/all-MiniLM-L6-v2-nepali") texts = [row["Article"] for row in ds] embeddings = model.encode( texts, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True ) builder = easyknn.EmbeddingsIndexBuilder() builder.add(embeddings=embeddings, items=texts) knn = easyknn.EasyKNN.from_builder_with_faiss(builder=builder) knn.save("./data/knn_index")