Spaces:
Running
Running
File size: 3,513 Bytes
38585cf bdbadf6 38585cf 218d226 38585cf 218d226 38585cf 4af69e2 218d226 bdbadf6 38585cf 218d226 38585cf ccee973 38585cf 218d226 ccee973 38585cf 218d226 38585cf 218d226 38585cf 218d226 992104c ab5d80d 38585cf 4af69e2 218d226 4af69e2 ab5d80d 4af69e2 be701db ab5d80d 38585cf 218d226 ab5d80d bdbadf6 218d226 38585cf 218d226 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import time
import os
from typing import Literal, Tuple
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
import meilisearch
tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-base-en-v1.5")
model = AutoModel.from_pretrained("BAAI/bge-base-en-v1.5")
model.eval()
cuda_available = torch.cuda.is_available()
print(f"CUDA available: {cuda_available}")
meilisearch_client = meilisearch.Client(
"https://edge.meilisearch.com", os.environ["MEILISEARCH_KEY"]
)
meilisearch_index_name = "docs-embed"
meilisearch_index = meilisearch_client.index(meilisearch_index_name)
output_options = ["RAG-friendly", "human-friendly"]
def search_embeddings(
query_text: str, output_option: Literal["RAG-friendly", "human-friendly"]
) -> Tuple[str, str]:
start_time_embedding = time.time()
query_prefix = "Represent this sentence for searching code documentation: "
query_tokens = tokenizer(
query_prefix + query_text,
padding=True,
truncation=True,
return_tensors="pt",
max_length=512,
)
# step1: tokenizer the query
with torch.no_grad():
# Compute token embeddings
model_output = model(**query_tokens)
sentence_embeddings = model_output[0][:, 0]
# normalize embeddings
sentence_embeddings = torch.nn.functional.normalize(
sentence_embeddings, p=2, dim=1
)
sentence_embeddings_list = sentence_embeddings[0].tolist()
elapsed_time_embedding = time.time() - start_time_embedding
# step2: search meilisearch
start_time_meilisearch = time.time()
response = meilisearch_index.search(
"",
opt_params={
"vector": sentence_embeddings_list,
"hybrid": {"semanticRatio": 1.0},
"limit": 5,
"attributesToRetrieve": [
"text",
"source_page_url",
"source_page_title",
"library",
],
},
)
elapsed_time_meilisearch = time.time() - start_time_meilisearch
hits = response["hits"]
sources_md = [
f"[\"{hit['source_page_title']}\"]({hit['source_page_url']})" for hit in hits
]
sources_md = ", ".join(sources_md)
# step3: present the results in markdown
if output_option == "human-friendly":
md = f"Stats:\n\nembedding time: {elapsed_time_embedding:.2f}s\n\nmeilisearch time: {elapsed_time_meilisearch:.2f}s\n\n---\n\n"
for hit in hits:
text, source_page_url, source_page_title = (
hit["text"],
hit["source_page_url"],
hit["source_page_title"],
)
source = f'src: ["{source_page_title}"]({source_page_url})'
md += text + f"\n\n{source}\n\n---\n\n"
return md, sources_md
elif output_option == "RAG-friendly":
hit_texts = [hit["text"] for hit in hits]
hit_text_str = "\n------------\n".join(hit_texts)
return hit_text_str, sources_md
demo = gr.Interface(
fn=search_embeddings,
inputs=[
gr.Textbox(
label="enter your query", placeholder="Type Markdown here...", lines=10
),
gr.Radio(
label="Select an output option",
choices=output_options,
value="RAG-friendly",
),
],
outputs=[gr.Markdown(), gr.Markdown()],
title="HF Docs Embeddings Explorer",
allow_flagging="never",
)
if __name__ == "__main__":
demo.launch()
|