from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.chains.summarize import load_summarize_chain from langchain.docstore.document import Document from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chains.question_answering import load_qa_chain from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain.prompts import PromptTemplate from llama_index import GPTSimpleVectorIndex from langchain.vectorstores import FAISS import pickle import os os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY') os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN') class LANGCHAIN_UTILS: def __init__(self): print() def generate_prompt_template(self, prompt_type='general'): prompt_template = '' if prompt_type == 'general': prompt_template = """Write a concise summary of the following: {text} CONCISE SUMMARY IN ENGLISH:""" elif prompt_type == 'weather': prompt_template = """ What would be the weather based on the below data: {text} """ return prompt_template def get_textual_summary(self, text, chain_type="stuff", custom_prompt=True, prompt_type='general' ): texts = [text] docs = [Document(page_content=t) for t in texts[:3]] llm = OpenAI(temperature=0) if custom_prompt: prompt_template = self.generate_prompt_template(prompt_type) PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT) else: chain = load_summarize_chain(llm, chain_type=chain_type) text_summary = return text_summary def get_weather_forecast_summary(self, text, chain_type="stuff" ): text = f""" What would be the weather based on the below data: {text} Give simple response without technical numbers which can be explained to human. """ texts = [text] docs = [Document(page_content=t) for t in texts[:3]] llm = OpenAI(temperature=0) chain = load_summarize_chain(llm, chain_type=chain_type) text_summary = return text_summary def get_answer_from_para(self, para, question, chain_type="stuff", custom_prompt=True ): # Prepare data (Split paragraph into chunks of small documents) text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_text(para) # Find similar docs that are relevant to the question embeddings = OpenAIEmbeddings() docsearch = Chroma.from_texts( texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))] ) # Search for the similar docs docs = docsearch.similarity_search(question, k=1) llm = OpenAI(temperature=0) # Create a Chain for question answering if custom_prompt: prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. {context} Question: {question} Answer in English:""" PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT) else: # chain = load_qa_with_sources_chain(llm, chain_type=chain_type) chain = load_qa_chain(llm, chain_type=chain_type) #, question=question) out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True) return out_dict['output_text'] def store_index(self, index, index_type='GPTSimpleVectorIndex', filepath='./output/index.json' ): if index_type == 'GPTSimpleVectorIndex': index.save_to_disk(filepath) elif index_type == 'pickle': with open(filepath, "wb") as f: pickle.dump(index, f) elif index_type == 'FAISS': index.save_local(filepath) def load_index(self, index_type='GPTSimpleVectorIndex', filepath='./output/index.json' ): if index_type == 'GPTSimpleVectorIndex': index = GPTSimpleVectorIndex.load_from_disk(filepath) elif index_type == 'pickle': with open(filepath, "rb") as f: index = pickle.load(f) elif index_type == 'FAISS': index = FAISS.load_local(filepath, OpenAIEmbeddings()) # can we use open-source embeddings? return index def convert_text_to_documents(self, text_list=[]): """ Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store """ from llama_index import Document documents = [Document(t) for t in text_list] return documents