import streamlit as st
import os
from streamlit_chat import message
import numpy as np
import pandas as pd
from io import StringIO
import PyPDF2
from import tqdm
import math
from transformers import pipeline
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms import HuggingFaceHub
from langchain.chains.summarize import load_summarize_chain
import re
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device != 'cuda':
st.markdown(f"Note: Using {device}. Expected slow responses compare to CUDA-enabled GPU. Please be patient thanks")
model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
# Creating a Index(Pinecone Vector Database)
import os
from pinecone.grpc import PineconeGRPC
pc = PineconeGRPC( api_key=os.environ.get("PINECONE_API_KEY") )
# Load environment variables from .env file
def connect_pinecone():
pinecone = PineconeGRPC(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
return pinecone
def get_pinecone_semantic_index(pinecone):
index_name = "sematic-search-index"
# only create if it deosnot exists
if index_name not in pinecone.list_indexes().names():
description="Semantic search",
spec=ServerlessSpec( cloud='aws', region='us-east-1' )
# now connect to index
index = pinecone.Index(index_name)
return index
def prompt_engineer(text, longtext, query):
summary_prompt_inst = """
write a concise summary of the following text delimited by triple backquotes.
return your response in bullet points which convers the key points of the text.
# Load the summarization pipeline with the specified model
# Generate the prompt
# Generate the summary
summary_prompt_template = ChatPromptTemplate.from_template(summary_prompt_inst)
summary_prompt = summary_prompt_template.format(context=longtext, question="generate summary of text?")
with st.sidebar:
Answer the question only based on the below context:
- You're a Research AI expert in the explaining and reading the research papers.
- Questions with out-of-context replay with The question is out of context.
- Always try to provide Keep it simple answers in nice format without incomplete sentence.
- Give the answer atleast 5 seperate lines addition to the title info.
- Only If question is relevent to context provide Doc Title: <title> Paragraph: <Paragraph> Page No: <pagenumber>
Answer the question based on the above context: {question}
prompt_template = ChatPromptTemplate.from_template(GENERATION_PROMPT_TEMPLATE)
prompt = prompt_template.format(context=text, question=query)
response_text = ""
result = ""
llm = HuggingFaceHub(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"temperature": 0.1, "task":"text-generation"}
st.write("GEN llm connection started..")
response_text = llm.invoke(prompt)
escaped_query = re.escape(query)
result = re.split(f'Answer the question based on the above context: {escaped_query}\n',response_text)[-1]
st.write("reponse generated see chat window 👉🏻")
except Exception as e:
st.error(f"Error invoke: {e}")
return result
def chat_actions():
pinecone = connect_pinecone()
index = get_pinecone_semantic_index(pinecone)
{"role": "user", "content": st.session_state["chat_input"]},
query = st.session_state["chat_input"]
query_embedding = model.encode(query)
# create the query vector
query_vector = query_embedding.tolist()
# now query vector database
result = index.query(query_vector, top_k=5, include_metadata=True) # result is a list of tuples
# Create a list of lists
data = []
consolidated_text = ""
i = 0
for res in result['matches']:
i = i + 1
data.append([f"{i}⭐", res['score'], res['metadata']['text']])
consolidated_text += res['metadata']['text']
# Create a DataFrame from the list of lists
resdf = pd.DataFrame(data, columns=['TopRank', 'Score', 'Text'])
with st.sidebar:
st.markdown("*:red[semantic search results]* with **:green[Retrieval Augmented Generation]** ***(RAG)***.")
bytesize = consolidated_text.encode("utf-8")
p = math.pow(1024, 2)
mbsize = round(len(bytesize) / p, 2)
st.write(f"Text length of {len(consolidated_text)} characters with {mbsize}MB size")
response = prompt_engineer(consolidated_text[:1024], consolidated_text, query)
for res in result['matches']:
"role": "assistant",
"content": f"{response}",
}, # This can be replaced with your chat response logic
if "chat_history" not in st.session_state:
st.session_state["chat_history"] = []
st.chat_input("show me the contents of ML paper published on xxx with article no. xx?", on_submit=chat_actions, key="chat_input")
for i in st.session_state["chat_history"]:
with st.chat_message(name=i["role"]):
def print_out(pages):
for i in range(len(pages)):
text = pages[i].extract_text().strip()
st.write(f"Page {i} : {text}")
def combine_text(pages):
concatenates_text = ""
for page in tqdm(pages):
text = page.extract_text().strip()
concatenates_text += text
bytesize = concatenates_text.encode("utf-8")
p = math.pow(1024, 2)
mbsize = round(len(bytesize) / p, 2)
st.write(f"There are {len(concatenates_text)} characters in the pdf with {mbsize}MB size")
return concatenates_text
def split_into_chunks(text, chunk_size):
chunks = []
for i in range(0, len(text), chunk_size):
chunks.append(text[i:i + chunk_size])
return chunks
def create_embeddings():
# Get the uploaded file
inputtext = ""
with st.sidebar:
uploaded_files = st.session_state["uploaded_files"]
for uploaded_file in uploaded_files:
# Read the contents of the file
reader = PyPDF2.PdfReader(uploaded_file)
pages = reader.pages
inputtext = combine_text(pages)
# connect to pinecone index
pinecone = connect_pinecone()
index = get_pinecone_semantic_index(pinecone)
# The maximum metadata size per vector is 40KB ~ 40000Bytes ~ each text character is 1 to 2 bytes. so rougly given chunk size of 10000 to 40000
chunk_size = 10000
batch_size = 2
chunks = split_into_chunks(inputtext, chunk_size)
for i in tqdm(range(0, len(chunks), batch_size)):
# find end of batch
end = min(i + batch_size, len(chunks))
# create ids batch
ids = [str(i) for i in range(i, end)]
# create metadata batch
metadata = [{"text": text} for text in chunks[i:end]]
# create embeddings
xc = model.encode(chunks[i:end])
# create records list for upsert
records = zip(ids, xc, metadata)
# upsert records
with st.sidebar:
st.write("created vector embeddings!")
# check no of records in the index
# Display the contents of the file
with st.sidebar:
***:red[Follow this steps]***
- upload pdf file to create embeddings using model on your own docs
- wait see success message on embeddings creation
- It Takes couple of mins after upload the pdf
- Now Chat with your documents with help of this RAG system
- It Generate Promted reponses on the upload pdf
- Provides summarized results and QA's using GPT models
- This system already trained on some wikipedia datasets too
uploaded_files = st.file_uploader('Choose your .pdf file', type="pdf", accept_multiple_files=True, key="uploaded_files", on_change=create_embeddings)
bytes_data = uploaded_file.getvalue()
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
# st.write(stringio)
string_data =
# st.write(string_data)
dataframe = pd.read_csv(uploaded_file)
st.write(dataframe)
# promt_engineer(text)