Spaces:
Running
Running
as-cle-bert
commited on
Update QdrantRag.py
Browse files- QdrantRag.py +0 -45
QdrantRag.py
CHANGED
@@ -21,10 +21,6 @@ qdrant_client = QdrantClient(url=os.getenv("qdrant_url"), api_key=os.getenv("qdr
|
|
21 |
sparse_encoder = SparseTextEmbedding(model_name="prithivida/Splade_PP_en_v1")
|
22 |
co = cohere.ClientV2(os.getenv("cohere_api_key"))
|
23 |
|
24 |
-
dataset = load_dataset("Karbo31881/Pokemon_images")
|
25 |
-
ds = dataset["train"]
|
26 |
-
labels = ds["text"]
|
27 |
-
|
28 |
def get_sparse_embedding(text: str, model: SparseTextEmbedding):
|
29 |
embeddings = list(model.embed(text))
|
30 |
vector = {f"sparse-text": models.SparseVector(indices=embeddings[0].indices, values=embeddings[0].values)}
|
@@ -169,44 +165,3 @@ class NeuralSearcher:
|
|
169 |
)
|
170 |
payloads = [hit.payload["label"] for hit in search_result]
|
171 |
return payloads
|
172 |
-
|
173 |
-
qdrant_client.recreate_collection(
|
174 |
-
collection_name="pokemon_texts",
|
175 |
-
vectors_config={"dense-text": models.VectorParams(
|
176 |
-
size=768, # Vector size is defined by used model
|
177 |
-
distance=models.Distance.COSINE,
|
178 |
-
)},
|
179 |
-
sparse_vectors_config={"sparse-text": models.SparseVectorParams(
|
180 |
-
index=models.SparseIndexParams(
|
181 |
-
on_disk=False
|
182 |
-
)
|
183 |
-
)}
|
184 |
-
)
|
185 |
-
textdata = load_dataset("wanghaofan/pokemon-wiki-captions")
|
186 |
-
names = textdata["train"]["name_en"]
|
187 |
-
texts = textdata["train"]["text_en"]
|
188 |
-
|
189 |
-
c = 0
|
190 |
-
|
191 |
-
for j in range(len(texts)):
|
192 |
-
txt = names[j].upper() + "\n\n" + texts[j]
|
193 |
-
l = c+1
|
194 |
-
upload_text_to_qdrant(qdrant_client, "pokemon_texts", encoder, txt, c, l)
|
195 |
-
c = l+1
|
196 |
-
|
197 |
-
qdrant_client.recreate_collection(
|
198 |
-
collection_name="pokemon_images",
|
199 |
-
vectors_config=models.VectorParams(
|
200 |
-
size=1024, # Vector size is defined by used model
|
201 |
-
distance=models.Distance.COSINE,
|
202 |
-
),
|
203 |
-
)
|
204 |
-
upload_images_to_qdrant(qdrant_client, "pokemon_images", "data/vector_pokemon.npy", labels)
|
205 |
-
|
206 |
-
qdrant_client.recreate_collection(
|
207 |
-
collection_name="semantic_cache",
|
208 |
-
vectors_config=models.VectorParams(
|
209 |
-
size=768, # Vector size is defined by used model
|
210 |
-
distance=models.Distance.COSINE,
|
211 |
-
),
|
212 |
-
)
|
|
|
21 |
sparse_encoder = SparseTextEmbedding(model_name="prithivida/Splade_PP_en_v1")
|
22 |
co = cohere.ClientV2(os.getenv("cohere_api_key"))
|
23 |
|
|
|
|
|
|
|
|
|
24 |
def get_sparse_embedding(text: str, model: SparseTextEmbedding):
|
25 |
embeddings = list(model.embed(text))
|
26 |
vector = {f"sparse-text": models.SparseVector(indices=embeddings[0].indices, values=embeddings[0].values)}
|
|
|
165 |
)
|
166 |
payloads = [hit.payload["label"] for hit in search_result]
|
167 |
return payloads
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|