File size: 4,702 Bytes
a447435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import PromptTemplate
from llama_index import GPTSimpleVectorIndex
from langchain.vectorstores import FAISS

import pickle
import os
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')



class LANGCHAIN_UTILS:
	def __init__(self):
		print()


	def generate_prompt_template(self, prompt_type='general'):
		prompt_template = ''
		
		if prompt_type == 'general':
			prompt_template = """Write a concise summary of the following:

			{text}

			CONCISE SUMMARY IN ENGLISH:"""
		
		elif prompt_type == 'weather':
			prompt_template = """
				What would be the weather based on the below data:
				{text}
			"""
			
		return prompt_template



	def get_textual_summary(self,
		text,
		chain_type="stuff",
		custom_prompt=True,
		prompt_type='general'
	):
		texts = [text]
		docs = [Document(page_content=t) for t in texts[:3]]
		
		llm = OpenAI(temperature=0)
		if custom_prompt:
			prompt_template = self.generate_prompt_template(prompt_type)
			PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
			chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT)
		else:
			chain = load_summarize_chain(llm, chain_type=chain_type)
		
		text_summary = chain.run(docs)
		return text_summary


	def get_weather_forecast_summary(self,
		text,
		chain_type="stuff"
	):
		text = f"""
			What would be the weather based on the below data:
			{text}
			
			Give simple response without technical numbers which can be explained to human.
		"""
		texts = [text]
		docs = [Document(page_content=t) for t in texts[:3]]
		
		llm = OpenAI(temperature=0)
		chain = load_summarize_chain(llm, chain_type=chain_type)
		text_summary = chain.run(docs)
		
		return text_summary


	def get_answer_from_para(self,
		para,
		question,
		chain_type="stuff",
		custom_prompt=True
	):
		# Prepare data (Split paragraph into chunks of small documents)
		text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
		texts = text_splitter.split_text(para)

		# Find similar docs that are relevant to the question
		embeddings = OpenAIEmbeddings()
		docsearch = Chroma.from_texts(
			texts, embeddings,
			metadatas=[{"source": str(i)} for i in range(len(texts))]
		)

		# Search for the similar docs
		docs = docsearch.similarity_search(question, k=1)
		
		llm = OpenAI(temperature=0)
		# Create a Chain for question answering
		if custom_prompt:
			prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

			{context}

			Question: {question}
			Answer in English:"""

			PROMPT = PromptTemplate(
				template=prompt_template, input_variables=["context", "question"]
			)
			chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT)
		else:
			# chain = load_qa_with_sources_chain(llm, chain_type=chain_type)
			chain = load_qa_chain(llm, chain_type=chain_type)
			# chain.run(input_documents=docs, question=question)
		
		out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
		return out_dict['output_text']


	def store_index(self,
		index,
		index_type='GPTSimpleVectorIndex',
		filepath='./output/index.json'
	):
		if index_type == 'GPTSimpleVectorIndex':
			index.save_to_disk(filepath)
		
		elif index_type == 'pickle':
			with open(filepath, "wb") as f:
				pickle.dump(index, f)
		
		elif index_type == 'FAISS':
			index.save_local(filepath)


	def load_index(self,
		index_type='GPTSimpleVectorIndex',
		filepath='./output/index.json'
	):
		if index_type == 'GPTSimpleVectorIndex':
			index = GPTSimpleVectorIndex.load_from_disk(filepath)
		
		elif index_type == 'pickle':
			with open(filepath, "rb") as f:
				index = pickle.load(f)
		
		elif index_type == 'FAISS':
			index = FAISS.load_local(filepath, OpenAIEmbeddings())   # can we use open-source embeddings?
		
		return index


	def convert_text_to_documents(self, text_list=[]):
		"""
			Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store
		"""
		
		from llama_index import Document
		documents = [Document(t) for t in text_list]
		return documents