Spaces:
Runtime error
Runtime error
File size: 4,702 Bytes
a447435 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import PromptTemplate
from llama_index import GPTSimpleVectorIndex
from langchain.vectorstores import FAISS
import pickle
import os
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')
class LANGCHAIN_UTILS:
def __init__(self):
print()
def generate_prompt_template(self, prompt_type='general'):
prompt_template = ''
if prompt_type == 'general':
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN ENGLISH:"""
elif prompt_type == 'weather':
prompt_template = """
What would be the weather based on the below data:
{text}
"""
return prompt_template
def get_textual_summary(self,
text,
chain_type="stuff",
custom_prompt=True,
prompt_type='general'
):
texts = [text]
docs = [Document(page_content=t) for t in texts[:3]]
llm = OpenAI(temperature=0)
if custom_prompt:
prompt_template = self.generate_prompt_template(prompt_type)
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT)
else:
chain = load_summarize_chain(llm, chain_type=chain_type)
text_summary = chain.run(docs)
return text_summary
def get_weather_forecast_summary(self,
text,
chain_type="stuff"
):
text = f"""
What would be the weather based on the below data:
{text}
Give simple response without technical numbers which can be explained to human.
"""
texts = [text]
docs = [Document(page_content=t) for t in texts[:3]]
llm = OpenAI(temperature=0)
chain = load_summarize_chain(llm, chain_type=chain_type)
text_summary = chain.run(docs)
return text_summary
def get_answer_from_para(self,
para,
question,
chain_type="stuff",
custom_prompt=True
):
# Prepare data (Split paragraph into chunks of small documents)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(para)
# Find similar docs that are relevant to the question
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_texts(
texts, embeddings,
metadatas=[{"source": str(i)} for i in range(len(texts))]
)
# Search for the similar docs
docs = docsearch.similarity_search(question, k=1)
llm = OpenAI(temperature=0)
# Create a Chain for question answering
if custom_prompt:
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer in English:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT)
else:
# chain = load_qa_with_sources_chain(llm, chain_type=chain_type)
chain = load_qa_chain(llm, chain_type=chain_type)
# chain.run(input_documents=docs, question=question)
out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
return out_dict['output_text']
def store_index(self,
index,
index_type='GPTSimpleVectorIndex',
filepath='./output/index.json'
):
if index_type == 'GPTSimpleVectorIndex':
index.save_to_disk(filepath)
elif index_type == 'pickle':
with open(filepath, "wb") as f:
pickle.dump(index, f)
elif index_type == 'FAISS':
index.save_local(filepath)
def load_index(self,
index_type='GPTSimpleVectorIndex',
filepath='./output/index.json'
):
if index_type == 'GPTSimpleVectorIndex':
index = GPTSimpleVectorIndex.load_from_disk(filepath)
elif index_type == 'pickle':
with open(filepath, "rb") as f:
index = pickle.load(f)
elif index_type == 'FAISS':
index = FAISS.load_local(filepath, OpenAIEmbeddings()) # can we use open-source embeddings?
return index
def convert_text_to_documents(self, text_list=[]):
"""
Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store
"""
from llama_index import Document
documents = [Document(t) for t in text_list]
return documents
|