Spaces:
Running
Running
import utils | |
import os | |
import openai | |
from llama_index import SimpleDirectoryReader | |
from llama_index import Document | |
from llama_index import VectorStoreIndex | |
from llama_index import ServiceContext | |
from llama_index.llms import OpenAI | |
from llama_index.embeddings import HuggingFaceEmbedding | |
from trulens_eval import Tru | |
from llama_index.memory import ChatMemoryBuffer | |
from utils import get_prebuilt_trulens_recorder | |
import time | |
openai.api_key = utils.get_openai_api_key() | |
documents = SimpleDirectoryReader( | |
input_files=["./raw_documents/HI_Knowledge_Base.pdf"] | |
).load_data() | |
document = Document(text="\n\n".join([doc.text for doc in documents])) | |
### gpt-4-1106-preview | |
### gpt-3.5-turbo-1106 / gpt-3.5-turbo | |
print("Initializing GPT 3.5 ..") | |
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1) | |
print("Initializing bge-small-en-v1.5 embedding model ..") | |
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
print("Creating vector store ..") | |
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) | |
index = VectorStoreIndex.from_documents([document], service_context=service_context) | |
if False: | |
query_engine = index.as_query_engine(streaming=True) | |
else: | |
memory = ChatMemoryBuffer.from_defaults(token_limit=15000) | |
# chat_engine = index.as_query_engine(streaming=True) | |
chat_engine = index.as_chat_engine( | |
chat_mode="context", | |
memory=memory | |
) | |
while True: | |
input_str = input("[User]: ") | |
if input_str == "END": | |
break | |
# res = chat_engine.query(input_str) | |
res = chat_engine.stream_chat(input_str) | |
bot_response = "" | |
print("[Bot]: ", end="") | |
for s in res.response_gen: | |
bot_response += s | |
print(s, end="") | |
print("") |