espejelomar commited on
Commit
ff4ec71
·
1 Parent(s): a6e7feb

add backend code

Browse files
backend/__init__.py ADDED
File without changes
backend/__pycache__/__init__.cpython-36.pyc ADDED
Binary file (159 Bytes). View file
 
backend/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (181 Bytes). View file
 
backend/__pycache__/config.cpython-36.pyc ADDED
Binary file (737 Bytes). View file
 
backend/__pycache__/config.cpython-38.pyc ADDED
Binary file (767 Bytes). View file
 
backend/__pycache__/inference.cpython-36.pyc ADDED
Binary file (2.2 kB). View file
 
backend/__pycache__/inference.cpython-38.pyc ADDED
Binary file (887 Bytes). View file
 
backend/__pycache__/utils.cpython-36.pyc ADDED
Binary file (1.54 kB). View file
 
backend/__pycache__/utils.cpython-38.pyc ADDED
Binary file (1.91 kB). View file
 
backend/inference.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from backend.utils import load_model, load_embeddings, load_texts
3
+
4
+ # Search
5
+ def query_search(query: str, n_answers: int, model_name: str):
6
+ model = load_model(model_name)
7
+
8
+ # Creating embeddings
9
+ # query_emb = model.encode(query, convert_to_tensor=True)[None, :]
10
+ query_emb = model.encode(query, convert_to_tensor=True)
11
+
12
+ print("loading embedding")
13
+ corpus_emb = load_embeddings()
14
+ corpus_texts = load_texts()
15
+
16
+ # Getting hits
17
+ hits = torch.nn.functional.cosine_similarity(
18
+ query_emb[None, :], corpus_emb, dim=1, eps=1e-8
19
+ )
20
+
21
+ corpus_texts["Similarity"] = hits.tolist()
22
+
23
+ return corpus_texts.sort_values(by="Similarity", ascending=False).head(n_answers)[
24
+ ["Description", "Code"]
25
+ ]
backend/utils.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ import streamlit as st
4
+ import torch
5
+
6
+
7
+ @st.cache(allow_output_mutation=True)
8
+ def load_model(model_name):
9
+ # Lazy downloading
10
+ model = SentenceTransformer(model_name)
11
+ return model
12
+
13
+
14
+ @st.cache(allow_output_mutation=True)
15
+ def load_embeddings():
16
+ # embedding pre-generated
17
+ corpus_emb = torch.load(
18
+ "./embeddings/descriptions_emb_100000_examples.pt",
19
+ map_location=torch.device("cpu"),
20
+ )
21
+ return corpus_emb
22
+
23
+
24
+ @st.cache(allow_output_mutation=True)
25
+ def load_texts():
26
+ # texts database pre-generated
27
+ corpus_texts = pd.read_csv("./data/codesearchnet_100000_examples.csv")
28
+ return corpus_texts