JosueElias commited on
Commit
9854175
·
1 Parent(s): e30de0d

Modifing code of dataset file.

Browse files
Files changed (1) hide show
  1. my_dataset.py +14 -2
my_dataset.py CHANGED
@@ -2,8 +2,20 @@
2
  load dataset:
3
  https://huggingface.co/docs/datasets/loading#hugging-face-hub
4
  '''
 
 
 
5
  from datasets import load_dataset
6
  import faiss
7
-
8
  datasetx = load_dataset("JosueElias/pipeline_dataset2")
9
- datasetx.load_faiss_index('embeddings', 'JosueElias/pipeline_faiss/faiss.index')
 
 
 
 
 
 
 
 
 
 
2
  load dataset:
3
  https://huggingface.co/docs/datasets/loading#hugging-face-hub
4
  '''
5
+
6
+ from datasets import load_from_disk, Dataset
7
+ from huggingface_hub import hf_hub_download
8
  from datasets import load_dataset
9
  import faiss
10
+ # load wikipedia dataset
11
  datasetx = load_dataset("JosueElias/pipeline_dataset2")
12
+ # load faiss file and get route of file
13
+ path2 = hf_hub_download(repo_id="JosueElias/pipeline_faiss", filename="faiss.index", repo_type="dataset")
14
+ # save wikipedia dataset locally
15
+ datasetx.save_to_disk("./directory")
16
+ # delete variable to have more memory space
17
+ del datasetx
18
+ # load dataset again in arrow format
19
+ datasetx = load_from_disk("./directory/train")
20
+ # load faiss to dataset
21
+ datasetx.load_faiss_index('embeddings', path2)