ShawnAI commited on
Commit
32f8fd9
·
1 Parent(s): 3d73654

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -17
app.py CHANGED
@@ -5,7 +5,7 @@ import time
5
  from langchain import PromptTemplate
6
  from langchain.llms import OpenAI
7
  from langchain.chat_models import ChatOpenAI
8
- from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
9
  from langchain.vectorstores import Pinecone
10
  from langchain.chains import LLMChain
11
  from langchain.chains.question_answering import load_qa_chain
@@ -28,12 +28,13 @@ PINECONE_LINK = "[Pinecone](https://www.pinecone.io)"
28
  LANGCHAIN_LINK = "[LangChain](https://python.langchain.com/en/latest/index.html)"
29
 
30
  EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "hkunlp/instructor-large")
31
- EMBEDDING_LOADER = HuggingFaceInstructEmbeddings
 
32
 
33
  # return top-k text chunks from vector store
34
- TOP_K_DEFAULT = 10
35
  TOP_K_MAX = 30
36
- SCORE_DEFAULT = 0.3
37
 
38
 
39
  BUTTON_MIN_WIDTH = 215
@@ -152,7 +153,7 @@ Answer:"""
152
  #----------------------------------------------------------------------------------------------------------
153
  #----------------------------------------------------------------------------------------------------------
154
 
155
- def init_model(api_key, emb_name, db_api_key, db_env, db_index):
156
  try:
157
  if not (api_key and api_key.startswith("sk-") and len(api_key) > 50):
158
  return None,MODEL_NULL+DOCS_NULL,None,None,None,None
@@ -173,8 +174,11 @@ def init_model(api_key, emb_name, db_api_key, db_env, db_index):
173
 
174
  if not (emb_name and db_api_key and db_env and db_index):
175
  return api_key,MODEL_DONE+DOCS_NULL,llm_dict,None,None,None
176
-
177
- embeddings = EMBEDDING_LOADER(model_name=emb_name)
 
 
 
178
 
179
  pinecone.init(api_key = db_api_key,
180
  environment = db_env)
@@ -208,7 +212,7 @@ def doc_similarity(query, db, top_k, score):
208
  k=top_k)
209
  #docsearch = db.as_retriever(search_kwargs={'k':top_k})
210
  #docs = docsearch.get_relevant_documents(query)
211
- # print(docs)
212
  udocs = remove_duplicates(docs, score)
213
  return udocs
214
 
@@ -357,14 +361,24 @@ with gr.Blocks(
357
 
358
  with gr.Tab(TAB_3):
359
  with gr.Row():
360
- emb_textbox = gr.Textbox(
361
- label = "Embedding Model",
362
- # show_label = False,
363
- value = EMBEDDING_MODEL,
364
- placeholder = "Paste Your Embedding Model Repo on HuggingFace",
365
- lines=1,
366
- interactive=True,
367
- type='email')
 
 
 
 
 
 
 
 
 
 
368
  with gr.Accordion("Pinecone Database for "+DOC_1):
369
  with gr.Row():
370
  db_api_textbox = gr.Textbox(
@@ -393,7 +407,7 @@ with gr.Blocks(
393
  interactive=True,
394
  type='email')
395
 
396
- init_input = [llm_api_textbox, emb_textbox, db_api_textbox, db_env_textbox, db_index_textbox]
397
  init_output = [llm_api_textbox, model_statusbox,
398
  llm, chain_2,
399
  vector_db, chatbot]
 
5
  from langchain import PromptTemplate
6
  from langchain.llms import OpenAI
7
  from langchain.chat_models import ChatOpenAI
8
+ from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, OpenAIEmbeddings
9
  from langchain.vectorstores import Pinecone
10
  from langchain.chains import LLMChain
11
  from langchain.chains.question_answering import load_qa_chain
 
28
  LANGCHAIN_LINK = "[LangChain](https://python.langchain.com/en/latest/index.html)"
29
 
30
  EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "hkunlp/instructor-large")
31
+ EMBEDDING_LOADER = os.environ.get("EMBEDDING_LOADER", "HuggingFaceInstructEmbeddings")
32
+ EMBEDDING_LIST = ["HuggingFaceInstructEmbeddings", "HuggingFaceEmbeddings", "OpenAIEmbeddings"]
33
 
34
  # return top-k text chunks from vector store
35
+ TOP_K_DEFAULT = 15
36
  TOP_K_MAX = 30
37
+ SCORE_DEFAULT = 0.33
38
 
39
 
40
  BUTTON_MIN_WIDTH = 215
 
153
  #----------------------------------------------------------------------------------------------------------
154
  #----------------------------------------------------------------------------------------------------------
155
 
156
+ def init_model(api_key, emb_name, emb_loader, db_api_key, db_env, db_index):
157
  try:
158
  if not (api_key and api_key.startswith("sk-") and len(api_key) > 50):
159
  return None,MODEL_NULL+DOCS_NULL,None,None,None,None
 
174
 
175
  if not (emb_name and db_api_key and db_env and db_index):
176
  return api_key,MODEL_DONE+DOCS_NULL,llm_dict,None,None,None
177
+
178
+ if emb_loader == "OpenAIEmbeddings":
179
+ embeddings = eval(emb_loader)(openai_api_key=api_key)
180
+ else:
181
+ embeddings = eval(emb_loader)(model_name=emb_name)
182
 
183
  pinecone.init(api_key = db_api_key,
184
  environment = db_env)
 
212
  k=top_k)
213
  #docsearch = db.as_retriever(search_kwargs={'k':top_k})
214
  #docs = docsearch.get_relevant_documents(query)
215
+ print(docs)
216
  udocs = remove_duplicates(docs, score)
217
  return udocs
218
 
 
361
 
362
  with gr.Tab(TAB_3):
363
  with gr.Row():
364
+ with gr.Column():
365
+ emb_textbox = gr.Textbox(
366
+ label = "Embedding Model",
367
+ # show_label = False,
368
+ value = EMBEDDING_MODEL,
369
+ placeholder = "Paste Your Embedding Model Repo on HuggingFace",
370
+ lines=1,
371
+ interactive=True,
372
+ type='email')
373
+
374
+ with gr.Column():
375
+ emb_dropdown = gr.Dropdown(
376
+ EMBEDDING_LIST,
377
+ value=EMBEDDING_LOADER,
378
+ multiselect=False,
379
+ interactive=True,
380
+ label="Embedding Loader")
381
+
382
  with gr.Accordion("Pinecone Database for "+DOC_1):
383
  with gr.Row():
384
  db_api_textbox = gr.Textbox(
 
407
  interactive=True,
408
  type='email')
409
 
410
+ init_input = [llm_api_textbox, emb_textbox, emb_dropdown, db_api_textbox, db_env_textbox, db_index_textbox]
411
  init_output = [llm_api_textbox, model_statusbox,
412
  llm, chain_2,
413
  vector_db, chatbot]