kastan commited on
Commit
ed62da9
Β·
1 Parent(s): f910cb6

adding new Pinecone index to demo

Browse files
Files changed (2) hide show
  1. app.py +17 -11
  2. retrieval.py +4 -4
app.py CHANGED
@@ -5,6 +5,7 @@ import retrieval
5
  # UNCOMMENT ONLY WHEN RUNNING LOCALLY (not on Spaces)
6
  # from dotenv import load_dotenv
7
  from text_generation import Client, InferenceAPIClient
 
8
 
9
  # load API keys from globally-availabe .env file
10
  # SECRETS_FILEPATH = "/mnt/project/chatbotai/huggingface_cache/internal_api_keys.env"
@@ -106,7 +107,7 @@ def predict(
106
  stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
107
  )
108
 
109
- final_chat_response = None
110
  for i, response in enumerate(iterator):
111
  if response.token.special:
112
  continue
@@ -123,28 +124,33 @@ def predict(
123
  history[-1] = partial_words
124
 
125
  chat = [(history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2)]
126
- final_chat_response = chat
127
  yield chat, history, None, None, None, []
128
 
129
- # Not perfect, but much better at removing all the crazy newlines.
130
- cleaned_final_chat_response = []
131
- for human_chat, bot_chat in final_chat_response:
132
- human_chat = human_chat.replace("<br>", "")
133
- human_chat = human_chat.replace("\n\n", "\n")
134
- bot_chat = bot_chat.replace("<br>", "")
135
- bot_chat = bot_chat.replace("\n\n", "\n")
136
- cleaned_final_chat_response.append( (human_chat, bot_chat) )
137
-
138
  # Pinecone context retrieval
139
  top_context_list = ta.retrieve_contexts_from_pinecone(user_question=inputs, topk=NUM_ANSWERS_GENERATED)
140
  # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], []
141
  yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], []
 
 
142
 
143
  # run CLIP
144
  images_list = ta.clip_text_to_image(inputs)
145
  # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
146
  yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
147
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  def reset_textbox():
150
  return gr.update(value="")
 
5
  # UNCOMMENT ONLY WHEN RUNNING LOCALLY (not on Spaces)
6
  # from dotenv import load_dotenv
7
  from text_generation import Client, InferenceAPIClient
8
+ from typing import List, Tuple
9
 
10
  # load API keys from globally-availabe .env file
11
  # SECRETS_FILEPATH = "/mnt/project/chatbotai/huggingface_cache/internal_api_keys.env"
 
107
  stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
108
  )
109
 
110
+ chat_response = None
111
  for i, response in enumerate(iterator):
112
  if response.token.special:
113
  continue
 
124
  history[-1] = partial_words
125
 
126
  chat = [(history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2)]
127
+ chat_response = chat
128
  yield chat, history, None, None, None, []
129
 
130
+ cleaned_final_chat_response = clean_chat_response(chat_response)
 
 
 
 
 
 
 
 
131
  # Pinecone context retrieval
132
  top_context_list = ta.retrieve_contexts_from_pinecone(user_question=inputs, topk=NUM_ANSWERS_GENERATED)
133
  # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], []
134
  yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], []
135
+
136
+ cleaned_final_chat_response = clean_chat_response(chat_response)
137
 
138
  # run CLIP
139
  images_list = ta.clip_text_to_image(inputs)
140
  # yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
141
  yield cleaned_final_chat_response, history, top_context_list[0], top_context_list[1], top_context_list[2], images_list
142
 
143
+ def clean_chat_response(chat: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
144
+ ''' Not perfect, but much better at removing all the crazy newlines. '''
145
+ cleaned_chat = []
146
+ for human_chat, bot_chat in chat:
147
+ # human_chat = human_chat.replace("<br>", "")
148
+ human_chat = human_chat.replace("\n\n", "\n")
149
+ # bot_chat = bot_chat.replace("<br>", "")
150
+ bot_chat = bot_chat.replace("\n\n", "\n")
151
+ cleaned_chat.append( (human_chat, bot_chat) )
152
+ return cleaned_chat
153
+
154
 
155
  def reset_textbox():
156
  return gr.update(value="")
retrieval.py CHANGED
@@ -47,11 +47,11 @@ class Retrieval:
47
 
48
  def _load_pinecone_vectorstore(self,):
49
  model_name = "intfloat/e5-large" # best text embedding model. 1024 dims.
50
-
51
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
52
- #pinecone.init(api_key=os.environ['PINECONE_API_KEY'], environment="us-west1-gcp")
53
- pinecone.init(api_key=PINECONE_API_KEY, environment="us-west1-gcp")
54
- pincecone_index = pinecone.Index("uiuc-chatbot")
 
55
 
56
  self.vectorstore = Pinecone(index=pincecone_index, embedding_function=embeddings.embed_query, text_key="text")
57
 
 
47
 
48
  def _load_pinecone_vectorstore(self,):
49
  model_name = "intfloat/e5-large" # best text embedding model. 1024 dims.
 
50
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
51
+ # pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment="us-west1-gcp")
52
+ # pincecone_index = pinecone.Index("uiuc-chatbot")
53
+ pinecone.init(api_key=os.environ.get('PINECONE_API_KEY_NEW_ACCT'), environment="us-east4-gcp")
54
+ pincecone_index = pinecone.Index("uiuc-chatbot-deduped")
55
 
56
  self.vectorstore = Pinecone(index=pincecone_index, embedding_function=embeddings.embed_query, text_key="text")
57