as-cle-bert commited on
Commit
454f3c5
·
verified ·
1 Parent(s): 55cdc0e

Update QdrantRag.py

Browse files
Files changed (1) hide show
  1. QdrantRag.py +0 -45
QdrantRag.py CHANGED
@@ -21,10 +21,6 @@ qdrant_client = QdrantClient(url=os.getenv("qdrant_url"), api_key=os.getenv("qdr
21
  sparse_encoder = SparseTextEmbedding(model_name="prithivida/Splade_PP_en_v1")
22
  co = cohere.ClientV2(os.getenv("cohere_api_key"))
23
 
24
- dataset = load_dataset("Karbo31881/Pokemon_images")
25
- ds = dataset["train"]
26
- labels = ds["text"]
27
-
28
  def get_sparse_embedding(text: str, model: SparseTextEmbedding):
29
  embeddings = list(model.embed(text))
30
  vector = {f"sparse-text": models.SparseVector(indices=embeddings[0].indices, values=embeddings[0].values)}
@@ -169,44 +165,3 @@ class NeuralSearcher:
169
  )
170
  payloads = [hit.payload["label"] for hit in search_result]
171
  return payloads
172
-
173
- qdrant_client.recreate_collection(
174
- collection_name="pokemon_texts",
175
- vectors_config={"dense-text": models.VectorParams(
176
- size=768, # Vector size is defined by used model
177
- distance=models.Distance.COSINE,
178
- )},
179
- sparse_vectors_config={"sparse-text": models.SparseVectorParams(
180
- index=models.SparseIndexParams(
181
- on_disk=False
182
- )
183
- )}
184
- )
185
- textdata = load_dataset("wanghaofan/pokemon-wiki-captions")
186
- names = textdata["train"]["name_en"]
187
- texts = textdata["train"]["text_en"]
188
-
189
- c = 0
190
-
191
- for j in range(len(texts)):
192
- txt = names[j].upper() + "\n\n" + texts[j]
193
- l = c+1
194
- upload_text_to_qdrant(qdrant_client, "pokemon_texts", encoder, txt, c, l)
195
- c = l+1
196
-
197
- qdrant_client.recreate_collection(
198
- collection_name="pokemon_images",
199
- vectors_config=models.VectorParams(
200
- size=1024, # Vector size is defined by used model
201
- distance=models.Distance.COSINE,
202
- ),
203
- )
204
- upload_images_to_qdrant(qdrant_client, "pokemon_images", "data/vector_pokemon.npy", labels)
205
-
206
- qdrant_client.recreate_collection(
207
- collection_name="semantic_cache",
208
- vectors_config=models.VectorParams(
209
- size=768, # Vector size is defined by used model
210
- distance=models.Distance.COSINE,
211
- ),
212
- )
 
21
  sparse_encoder = SparseTextEmbedding(model_name="prithivida/Splade_PP_en_v1")
22
  co = cohere.ClientV2(os.getenv("cohere_api_key"))
23
 
 
 
 
 
24
  def get_sparse_embedding(text: str, model: SparseTextEmbedding):
25
  embeddings = list(model.embed(text))
26
  vector = {f"sparse-text": models.SparseVector(indices=embeddings[0].indices, values=embeddings[0].values)}
 
165
  )
166
  payloads = [hit.payload["label"] for hit in search_result]
167
  return payloads