from datasets import load_from_disk, Dataset from huggingface_hub import hf_hub_download from datasets import load_dataset import faiss # load faiss file and get route of file https://huggingface.co/docs/huggingface_hub/guides/download#from-latest-version path2 = hf_hub_download(repo_id="JosueElias/pipeline_faiss", filename="faiss.index", repo_type="dataset") # load wikipedia dataset https://huggingface.co/docs/datasets/loading#hugging-face-hub datasetx = load_dataset("JosueElias/pipeline_dataset2") # save wikipedia dataset locally https://huggingface.co/docs/datasets/process#save datasetx.save_to_disk("./directory") # delete variable to have more memory space del datasetx # load dataset again in arrow format datasetx = load_from_disk("./directory/train") # load faiss to dataset datasetx.load_faiss_index('embeddings', path2)