Spaces:
Sleeping
Sleeping
from datasets import load_from_disk, Dataset | |
from huggingface_hub import hf_hub_download | |
from datasets import load_dataset | |
import faiss | |
# load faiss file and get route of file https://huggingface.co/docs/huggingface_hub/guides/download#from-latest-version | |
path2 = hf_hub_download(repo_id="JosueElias/pipeline_faiss", filename="faiss.index", repo_type="dataset") | |
# load wikipedia dataset https://huggingface.co/docs/datasets/loading#hugging-face-hub | |
datasetx = load_dataset("JosueElias/pipeline_dataset2") | |
# save wikipedia dataset locally https://huggingface.co/docs/datasets/process#save | |
datasetx.save_to_disk("./directory") | |
# delete variable to have more memory space | |
del datasetx | |
# load dataset again in arrow format | |
datasetx = load_from_disk("./directory/train") | |
# load faiss to dataset | |
datasetx.load_faiss_index('embeddings', path2) |