import os
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from groq import Groq
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation
# CSS styling for a professional look with black background
st.markdown("""
""", unsafe_allow_html=True)
# Initialize retriever and Groq client
retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#api_key = os.getenv("GROQ_API_KEY")
#client = Groq(api_key=api_key)
# Knowledge base (documents) and embeddings
documents = [
"Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
"The main components of a RAG system are the retriever and the generator.",
"A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
"The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
"Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
]
document_embeddings = retriever.encode(documents, convert_to_tensor=True)
def retrieve(query, top_k=1):
query_embedding = retriever.encode(query, convert_to_tensor=True)
hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
return top_docs[0] if hits[0] else None
def generate_response(query, context):
response = client.chat.completions.create(
messages=[{
"role": "user",
"content": f"Context: {context} Question: {query} Answer:"
}],
model="gemma2-9b-it"
)
return response.choices[0].message.content
# Streamlit app layout
st.markdown('
DocumentsReader
', unsafe_allow_html=True)
# About the App section
with st.expander("About App"):
st.write("""
### About the App: Document-Based RAG Question Answering
This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents.
**Key Features:**
- Advanced Retrieval System
- Generative Answering Capability
- Multi-format Document Support
- Seamless Knowledge Base Update
- Contextually Rich Answers
**Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible.
**Social Links:**
- [GitHub](https://github.com/hakgs1234)
- [LinkedIn](https://linkedin.com/in/hamaadayubkhan)
""")
# Document upload and knowledge base update
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"])
if uploaded_file:
if uploaded_file.type == "application/pdf":
file_text = PdfReader(uploaded_file).extract_text()
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs])
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")])
elif uploaded_file.type == "text/plain":
file_text = uploaded_file.read().decode("utf-8")
documents.append(file_text)
document_embeddings = retriever.encode(documents, convert_to_tensor=True)
st.success("Document content successfully added to the knowledge base.")
# Question input and output handling
question = st.text_input("Enter your question:")
# Check if there is a question and display the answer above the input field
if question:
retrieved_context = retrieve(question)
answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base."
# Display the answer above the input field
st.markdown("### Answer:")
st.write(answer)