import os import streamlit as st from sentence_transformers import SentenceTransformer, util from groq import Groq from PyPDF2 import PdfReader from docx import Document from pptx import Presentation # CSS styling for a professional look with black background st.markdown(""" """, unsafe_allow_html=True) # Initialize retriever and Groq client retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') #api_key = os.getenv("GROQ_API_KEY") #client = Groq(api_key=api_key) # Knowledge base (documents) and embeddings documents = [ "Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.", "The main components of a RAG system are the retriever and the generator.", "A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.", "The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.", "Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence." ] document_embeddings = retriever.encode(documents, convert_to_tensor=True) def retrieve(query, top_k=1): query_embedding = retriever.encode(query, convert_to_tensor=True) hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k) top_docs = [documents[hit['corpus_id']] for hit in hits[0]] return top_docs[0] if hits[0] else None def generate_response(query, context): response = client.chat.completions.create( messages=[{ "role": "user", "content": f"Context: {context} Question: {query} Answer:" }], model="gemma2-9b-it" ) return response.choices[0].message.content # Streamlit app layout st.markdown('

DocumentsReader

', unsafe_allow_html=True) # About the App section with st.expander("About App"): st.write(""" ### About the App: Document-Based RAG Question Answering This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents. **Key Features:** - Advanced Retrieval System - Generative Answering Capability - Multi-format Document Support - Seamless Knowledge Base Update - Contextually Rich Answers **Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible. **Social Links:** - [GitHub](https://github.com/hakgs1234) - [LinkedIn](https://linkedin.com/in/hamaadayubkhan) """) # Document upload and knowledge base update uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"]) if uploaded_file: if uploaded_file.type == "application/pdf": file_text = PdfReader(uploaded_file).extract_text() elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs]) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation": file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")]) elif uploaded_file.type == "text/plain": file_text = uploaded_file.read().decode("utf-8") documents.append(file_text) document_embeddings = retriever.encode(documents, convert_to_tensor=True) st.success("Document content successfully added to the knowledge base.") # Question input and output handling question = st.text_input("Enter your question:") # Check if there is a question and display the answer above the input field if question: retrieved_context = retrieve(question) answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base." # Display the answer above the input field st.markdown("### Answer:") st.write(answer)