import asyncio from infinity_emb import AsyncEngineArray, EngineArgs, AsyncEmbeddingEngine import numpy as np array = AsyncEngineArray.from_args([ EngineArgs(model_name_or_path = "OpenBMB/UltraRAG-Embedding", engine="torch", dtype="float16", bettertransformer=False, pooling_method="mean", trust_remote_code=True), ]) queries = ["中国的首都是哪里?"] # "What is the capital of China?" passages = ["beijing", "shanghai"] # "北京", "上海" INSTRUCTION = "Query:" queries = [f"{INSTRUCTION} {query}" for query in queries] async def embed_text(engine: AsyncEmbeddingEngine,sentences): async with engine: embeddings, usage = await engine.embed(sentences=sentences) return embeddings queries_embedding = asyncio.run(embed_text(array[0],queries)) passages_embedding = asyncio.run(embed_text(array[0],passages)) scores = (np.array(queries_embedding) @ np.array(passages_embedding).T) print(scores.tolist()) # [[0.40356746315956116, 0.36183443665504456]]