KKMS-KSSW-HF / utils /langchain_utils.py
Chintan Donda
Application file, KKMS repo and utils files
a447435
raw
history blame
4.7 kB
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import PromptTemplate
from llama_index import GPTSimpleVectorIndex
from langchain.vectorstores import FAISS
import pickle
import os
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')
class LANGCHAIN_UTILS:
def __init__(self):
print()
def generate_prompt_template(self, prompt_type='general'):
prompt_template = ''
if prompt_type == 'general':
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN ENGLISH:"""
elif prompt_type == 'weather':
prompt_template = """
What would be the weather based on the below data:
{text}
"""
return prompt_template
def get_textual_summary(self,
text,
chain_type="stuff",
custom_prompt=True,
prompt_type='general'
):
texts = [text]
docs = [Document(page_content=t) for t in texts[:3]]
llm = OpenAI(temperature=0)
if custom_prompt:
prompt_template = self.generate_prompt_template(prompt_type)
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT)
else:
chain = load_summarize_chain(llm, chain_type=chain_type)
text_summary = chain.run(docs)
return text_summary
def get_weather_forecast_summary(self,
text,
chain_type="stuff"
):
text = f"""
What would be the weather based on the below data:
{text}
Give simple response without technical numbers which can be explained to human.
"""
texts = [text]
docs = [Document(page_content=t) for t in texts[:3]]
llm = OpenAI(temperature=0)
chain = load_summarize_chain(llm, chain_type=chain_type)
text_summary = chain.run(docs)
return text_summary
def get_answer_from_para(self,
para,
question,
chain_type="stuff",
custom_prompt=True
):
# Prepare data (Split paragraph into chunks of small documents)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(para)
# Find similar docs that are relevant to the question
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_texts(
texts, embeddings,
metadatas=[{"source": str(i)} for i in range(len(texts))]
)
# Search for the similar docs
docs = docsearch.similarity_search(question, k=1)
llm = OpenAI(temperature=0)
# Create a Chain for question answering
if custom_prompt:
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer in English:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT)
else:
# chain = load_qa_with_sources_chain(llm, chain_type=chain_type)
chain = load_qa_chain(llm, chain_type=chain_type)
# chain.run(input_documents=docs, question=question)
out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
return out_dict['output_text']
def store_index(self,
index,
index_type='GPTSimpleVectorIndex',
filepath='./output/index.json'
):
if index_type == 'GPTSimpleVectorIndex':
index.save_to_disk(filepath)
elif index_type == 'pickle':
with open(filepath, "wb") as f:
pickle.dump(index, f)
elif index_type == 'FAISS':
index.save_local(filepath)
def load_index(self,
index_type='GPTSimpleVectorIndex',
filepath='./output/index.json'
):
if index_type == 'GPTSimpleVectorIndex':
index = GPTSimpleVectorIndex.load_from_disk(filepath)
elif index_type == 'pickle':
with open(filepath, "rb") as f:
index = pickle.load(f)
elif index_type == 'FAISS':
index = FAISS.load_local(filepath, OpenAIEmbeddings()) # can we use open-source embeddings?
return index
def convert_text_to_documents(self, text_list=[]):
"""
Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store
"""
from llama_index import Document
documents = [Document(t) for t in text_list]
return documents