File size: 843 Bytes
9854175
 
 
b96f334
 
b50e558
f6e7168
9854175
b50e558
 
 
 
f6e7168
9854175
b50e558
9854175
 
b50e558
9854175
 
b50e558
9854175
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

from datasets import load_from_disk, Dataset
from huggingface_hub import hf_hub_download
from datasets import load_dataset
import faiss

# load faiss file and get route of file https://huggingface.co/docs/huggingface_hub/guides/download#from-latest-version
path2 = hf_hub_download(repo_id="JosueElias/pipeline_faiss", filename="faiss.index", repo_type="dataset")

# load wikipedia dataset https://huggingface.co/docs/datasets/loading#hugging-face-hub
datasetx = load_dataset("JosueElias/pipeline_dataset2")

# save wikipedia dataset locally https://huggingface.co/docs/datasets/process#save
datasetx.save_to_disk("./directory")

# delete variable to have more memory space
del datasetx

# load dataset again in arrow format
datasetx = load_from_disk("./directory/train")

# load faiss to dataset
datasetx.load_faiss_index('embeddings', path2)