Spaces:
Sleeping
Sleeping
File size: 765 Bytes
ade3b7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import duckdb
import numpy as np
from sentence_transformers import SentenceTransformer
from safetensors.numpy import save_file
from tqdm import tqdm
conn = duckdb.connect("sonajaht.db")
model = SentenceTransformer("sentence-transformers/LaBSE")
query = "SELECT value FROM definitions"
result = conn.execute(query)
vectors = []
batch_size = 64
p_bar = tqdm()
while True:
chunk = result.fetchmany(batch_size)
if not chunk:
break
values = [row[0] for row in chunk]
vectors.append(
model.encode(
values, show_progress_bar=False, batch_size=batch_size, device="mps"
)
)
p_bar.update(batch_size)
vectors = np.concatenate(vectors)
save_file(dict(vectors=vectors), "definitions.safetensors")
conn.close()
|