Spaces:

Chintan-Donda
/

KKMS-KSSW-HF

Runtime error

KKMS-KSSW-HF / utils /langchain_utils.py

Chintan Donda

Application file, KKMS repo and utils files

a447435 almost 2 years ago

4.7 kB

	from langchain.llms import OpenAI
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.chains.summarize import load_summarize_chain
	from langchain.docstore.document import Document
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.chains.question_answering import load_qa_chain
	from langchain.chains.qa_with_sources import load_qa_with_sources_chain
	from langchain.prompts import PromptTemplate
	from llama_index import GPTSimpleVectorIndex
	from langchain.vectorstores import FAISS

	import pickle
	import os
	os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
	os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')



	class LANGCHAIN_UTILS:
	def __init__(self):
	print()


	def generate_prompt_template(self, prompt_type='general'):
	prompt_template = ''

	if prompt_type == 'general':
	prompt_template = """Write a concise summary of the following:

	{text}

	CONCISE SUMMARY IN ENGLISH:"""

	elif prompt_type == 'weather':
	prompt_template = """
	What would be the weather based on the below data:
	{text}
	"""

	return prompt_template



	def get_textual_summary(self,
	text,
	chain_type="stuff",
	custom_prompt=True,
	prompt_type='general'
	):
	texts = [text]
	docs = [Document(page_content=t) for t in texts[:3]]

	llm = OpenAI(temperature=0)
	if custom_prompt:
	prompt_template = self.generate_prompt_template(prompt_type)
	PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
	chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT)
	else:
	chain = load_summarize_chain(llm, chain_type=chain_type)

	text_summary = chain.run(docs)
	return text_summary


	def get_weather_forecast_summary(self,
	text,
	chain_type="stuff"
	):
	text = f"""
	What would be the weather based on the below data:
	{text}

	Give simple response without technical numbers which can be explained to human.
	"""
	texts = [text]
	docs = [Document(page_content=t) for t in texts[:3]]

	llm = OpenAI(temperature=0)
	chain = load_summarize_chain(llm, chain_type=chain_type)
	text_summary = chain.run(docs)

	return text_summary


	def get_answer_from_para(self,
	para,
	question,
	chain_type="stuff",
	custom_prompt=True
	):
	# Prepare data (Split paragraph into chunks of small documents)
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	texts = text_splitter.split_text(para)

	# Find similar docs that are relevant to the question
	embeddings = OpenAIEmbeddings()
	docsearch = Chroma.from_texts(
	texts, embeddings,
	metadatas=[{"source": str(i)} for i in range(len(texts))]
	)

	# Search for the similar docs
	docs = docsearch.similarity_search(question, k=1)

	llm = OpenAI(temperature=0)
	# Create a Chain for question answering
	if custom_prompt:
	prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

	{context}

	Question: {question}
	Answer in English:"""

	PROMPT = PromptTemplate(
	template=prompt_template, input_variables=["context", "question"]
	)
	chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT)
	else:
	# chain = load_qa_with_sources_chain(llm, chain_type=chain_type)
	chain = load_qa_chain(llm, chain_type=chain_type)
	# chain.run(input_documents=docs, question=question)

	out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
	return out_dict['output_text']


	def store_index(self,
	index,
	index_type='GPTSimpleVectorIndex',
	filepath='./output/index.json'
	):
	if index_type == 'GPTSimpleVectorIndex':
	index.save_to_disk(filepath)

	elif index_type == 'pickle':
	with open(filepath, "wb") as f:
	pickle.dump(index, f)

	elif index_type == 'FAISS':
	index.save_local(filepath)


	def load_index(self,
	index_type='GPTSimpleVectorIndex',
	filepath='./output/index.json'
	):
	if index_type == 'GPTSimpleVectorIndex':
	index = GPTSimpleVectorIndex.load_from_disk(filepath)

	elif index_type == 'pickle':
	with open(filepath, "rb") as f:
	index = pickle.load(f)

	elif index_type == 'FAISS':
	index = FAISS.load_local(filepath, OpenAIEmbeddings()) # can we use open-source embeddings?

	return index


	def convert_text_to_documents(self, text_list=[]):
	"""
	Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store
	"""

	from llama_index import Document
	documents = [Document(t) for t in text_list]
	return documents