|
from timeit import default_timer as timer |
|
|
|
import streamlit as st |
|
import chromadb |
|
import pandas as pd |
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
st.title("📚 Semantic Search with Vector Database of SNOMED-CT 💡") |
|
st.caption("🔍 Search any SNOMED-CT relate decription & concept with natural language.") |
|
st.sidebar.title("🔍 Search Setting") |
|
query_number = st.sidebar.slider("Query Numbers", 10, 30, 10) |
|
st.markdown("##### ➡️⌨️ Please input some medical description here, e.g. \"insomnia two nights a week.\", \"COPD\", \"Degenerative Joint Disease\"") |
|
query_text = st.text_input("Input: any medical description snippet","Type-2 Diabetes") |
|
|
|
|
|
chroma_client = chromadb.PersistentClient(path="snomed_ct_id_term_1410k") |
|
collection = chroma_client.get_or_create_collection(name="snomed_ct_id_term") |
|
start = 1.0 |
|
end = 1.1 |
|
st.markdown("##### ➡️Chroma DB will return " + str(query_number) |
|
+ " related instances from " + str(collection.count()) + " collections.") |
|
|
|
|
|
|
|
|
|
def query_chroma_db(query_text, query_number): |
|
results = collection.query( |
|
query_texts=[query_text], |
|
n_results=query_number, |
|
include=["distances", "metadatas", "documents"] |
|
) |
|
return results |
|
|
|
|
|
def get_df_from_chroma_results(results): |
|
result_dict = {'ids': results['ids'][0], 'concept_ids': [ str(sub['concept_id']) for sub in results['metadatas'][0] ], 'distances': results['distances'][0], 'documents': results['documents'][0]} |
|
df = pd.DataFrame(result_dict) |
|
return df |
|
|
|
start = timer() |
|
results = query_chroma_db(query_text, query_number) |
|
end = timer() |
|
st.markdown("###### ➡️ Query Time : {: .6f} seconds.".format(end - start)) |
|
st.divider() |
|
|
|
results_df = get_df_from_chroma_results(results) |
|
|
|
|
|
st.markdown("### 📊 Similar Search Results from Chroma Vector DB") |
|
st.dataframe(results_df, 1000, 500) |
|
|
|
|