Ritesh-hf commited on
Commit
1c806b3
·
1 Parent(s): 38f2a07

update index

Browse files
Files changed (7) hide show
  1. .gitignore +3 -0
  2. Dockerfile +1 -1
  3. UAE-NLA.json +0 -0
  4. app.py +107 -93
  5. requirements.txt +6 -7
  6. templates/chat.html +2 -16
  7. updated-traveler.json +0 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ *.ipynb
3
+ __pycache__/*
Dockerfile CHANGED
@@ -13,4 +13,4 @@ COPY --chown=user ./requirements.txt requirements.txt
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
  COPY --chown=user . /app
16
- CMD ["gunicorn", "-b", "0.0.0.0:7860", "-k", "geventwebsocket.gunicorn.workers.GeventWebSocketWorker", "-w", "1", "app:app"]
 
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
  COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
UAE-NLA.json DELETED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -1,15 +1,13 @@
1
- from gevent import monkey
2
- monkey.patch_all()
3
-
4
  import nltk
5
  nltk.download('punkt_tab')
6
 
7
  import os
8
  from dotenv import load_dotenv
9
  import asyncio
10
- from flask import Flask, request, render_template
11
- from flask_cors import CORS
12
- from flask_socketio import SocketIO, emit, join_room, leave_room
 
13
  from langchain.chains import create_history_aware_retriever, create_retrieval_chain
14
  from langchain.chains.combine_documents import create_stuff_documents_chain
15
  from langchain_community.chat_message_histories import ChatMessageHistory
@@ -20,10 +18,12 @@ from pinecone import Pinecone
20
  from pinecone_text.sparse import BM25Encoder
21
  from langchain_huggingface import HuggingFaceEmbeddings
22
  from langchain_community.retrievers import PineconeHybridSearchRetriever
23
- from langchain_groq import ChatGroq
24
  from langchain.retrievers import ContextualCompressionRetriever
25
- from langchain.retrievers.document_compressors import FlashrankRerank
26
  from langchain_community.chat_models import ChatPerplexity
 
 
 
 
27
 
28
  # Load environment variables
29
  load_dotenv(".env")
@@ -38,14 +38,19 @@ os.environ['USER_AGENT'] = USER_AGENT
38
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
39
  os.environ["TOKENIZERS_PARALLELISM"] = 'true'
40
 
41
- # Initialize Flask app and SocketIO with CORS
42
- app = Flask(__name__)
43
- CORS(app)
44
- socketio = SocketIO(app, async_mode='gevent', cors_allowed_origins="*")
45
- app.config['SESSION_COOKIE_SECURE'] = True # Use HTTPS
46
- app.config['SESSION_COOKIE_HTTPONLY'] = True
47
- app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
48
- app.config['SECRET_KEY'] = SECRET_KEY
 
 
 
 
 
49
 
50
  # Function to initialize Pinecone connection
51
  def initialize_pinecone(index_name: str):
@@ -56,15 +61,13 @@ def initialize_pinecone(index_name: str):
56
  print(f"Error initializing Pinecone: {e}")
57
  raise
58
 
59
-
60
  ##################################################
61
  ## Change down here
62
  ##################################################
63
 
64
  # Initialize Pinecone index and BM25 encoder
65
- pinecone_index = initialize_pinecone("traveler-demo-website-vectorstore")
66
- bm25 = BM25Encoder().load("./bm25_traveler_website.json")
67
-
68
 
69
  ##################################################
70
  ##################################################
@@ -80,11 +83,12 @@ retriever = PineconeHybridSearchRetriever(
80
  )
81
 
82
  # Initialize LLM
83
- # llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, max_tokens=1024, max_retries=2)
84
- llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-70b-instruct", max_tokens=1024, max_retries=2)
85
 
86
  # Initialize Reranker
87
- compressor = FlashrankRerank()
 
 
88
  compression_retriever = ContextualCompressionRetriever(
89
  base_compressor=compressor, base_retriever=retriever
90
  )
@@ -105,33 +109,31 @@ contextualize_q_prompt = ChatPromptTemplate.from_messages(
105
  history_aware_retriever = create_history_aware_retriever(llm, compression_retriever, contextualize_q_prompt)
106
 
107
  # QA system prompt and chain
108
- qa_system_prompt = """ You are a highly skilled information retrieval assistant. Use the following context to answer questions effectively. \
109
- If you don't know the answer, simply state that you don't know. \
110
- Your answer should be in {language} language. \
111
- Provide answers in proper HTML format and keep them concise. \
112
-
113
- When responding to queries, follow these guidelines: \
114
-
115
- 1. Provide Clear Answers: \
116
- - Ensure the response directly addresses the query with accurate and relevant information.\
117
- - Only use information from the provided context. Using outside information may result in the termination of the chat.
118
-
119
- 2. Include Detailed References: \
120
- - Links to Sources: Include URLs to credible sources where users can verify information or explore further. \
121
- - Reference Sites: Mention specific websites or platforms that offer additional information. \
122
- - Downloadable Materials: Provide links to any relevant downloadable resources if applicable. \
123
-
124
- 3. Formatting for Readability: \
125
- - The answer should be in a proper HTML format with appropriate tags. \
126
- - For Arabic language response align the text to the right and convert numbers.
127
- - Double-check if the language of the answer is correct or not.
128
- - Use bullet points or numbered lists where applicable to present information. \
129
- - Highlight key details using bold or italics. \
130
- - Provide proper and meaningful abbreviations for URLs. Do not include naked URLs. \
131
-
132
- 4. Organize Content Logically: \
133
- - Structure the content logically, ensuring easy navigation and understanding for the user. \
134
-
135
  {context}
136
  """
137
  qa_prompt = ChatPromptTemplate.from_messages(
@@ -141,7 +143,9 @@ qa_prompt = ChatPromptTemplate.from_messages(
141
  ("human", "{input}")
142
  ]
143
  )
144
- question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
 
 
145
 
146
  # Retrieval and Generative (RAG) Chain
147
  rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
@@ -149,9 +153,6 @@ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chai
149
  # Chat message history storage
150
  store = {}
151
 
152
- def clean_temporary_data():
153
- store.clear()
154
-
155
  def get_session_history(session_id: str) -> BaseChatMessageHistory:
156
  if session_id not in store:
157
  store[session_id] = ChatMessageHistory()
@@ -167,46 +168,59 @@ conversational_rag_chain = RunnableWithMessageHistory(
167
  output_messages_key="answer",
168
  )
169
 
170
- # Function to handle WebSocket connection
171
- @socketio.on('connect')
172
- def handle_connect():
173
- print(f"Client connected: {request.sid}")
174
- emit('connection_response', {'message': 'Connected successfully.'})
175
-
176
- # Function to handle WebSocket disconnection
177
- @socketio.on('disconnect')
178
- def handle_disconnect():
179
- print(f"Client disconnected: {request.sid}")
180
- clean_temporary_data()
181
-
182
- # Function to handle WebSocket messages
183
- @socketio.on('message')
184
- def handle_message(data):
185
- question = data.get('question')
186
- language = data.get('language')
187
- if "en" in language:
188
- language = "English"
189
- else:
190
- language = "Arabic"
191
- session_id = data.get('session_id', SESSION_ID_DEFAULT)
192
- chain = conversational_rag_chain.pick("answer")
193
 
 
 
 
 
 
 
194
  try:
195
- for chunk in chain.stream(
196
- {"input": question, 'language': language},
197
- config={"configurable": {"session_id": session_id}},
198
- ):
199
- emit('response', chunk, room=request.sid)
200
- except Exception as e:
201
- print(f"Error during message handling: {e}")
202
- emit('response', {"error": "An error occurred while processing your request."}, room=request.sid)
203
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  # Home route
206
- @app.route("/")
207
- def index_view():
208
- return render_template('chat.html')
209
-
210
- # Main function to run the app
211
- if __name__ == '__main__':
212
- socketio.run(app, debug=True)
 
 
 
 
1
  import nltk
2
  nltk.download('punkt_tab')
3
 
4
  import os
5
  from dotenv import load_dotenv
6
  import asyncio
7
+ from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
8
+ from fastapi.responses import HTMLResponse
9
+ from fastapi.templating import Jinja2Templates
10
+ from fastapi.middleware.cors import CORSMiddleware
11
  from langchain.chains import create_history_aware_retriever, create_retrieval_chain
12
  from langchain.chains.combine_documents import create_stuff_documents_chain
13
  from langchain_community.chat_message_histories import ChatMessageHistory
 
18
  from pinecone_text.sparse import BM25Encoder
19
  from langchain_huggingface import HuggingFaceEmbeddings
20
  from langchain_community.retrievers import PineconeHybridSearchRetriever
 
21
  from langchain.retrievers import ContextualCompressionRetriever
 
22
  from langchain_community.chat_models import ChatPerplexity
23
+ from langchain.retrievers.document_compressors import CrossEncoderReranker
24
+ from langchain_community.cross_encoders import HuggingFaceCrossEncoder
25
+ from langchain_core.prompts import PromptTemplate
26
+ import re
27
 
28
  # Load environment variables
29
  load_dotenv(".env")
 
38
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
39
  os.environ["TOKENIZERS_PARALLELISM"] = 'true'
40
 
41
+ # Initialize FastAPI app and CORS
42
+ app = FastAPI()
43
+ origins = ["*"] # Adjust as needed
44
+
45
+ app.add_middleware(
46
+ CORSMiddleware,
47
+ allow_origins=origins,
48
+ allow_credentials=True,
49
+ allow_methods=["*"],
50
+ allow_headers=["*"],
51
+ )
52
+
53
+ templates = Jinja2Templates(directory="templates")
54
 
55
  # Function to initialize Pinecone connection
56
  def initialize_pinecone(index_name: str):
 
61
  print(f"Error initializing Pinecone: {e}")
62
  raise
63
 
 
64
  ##################################################
65
  ## Change down here
66
  ##################################################
67
 
68
  # Initialize Pinecone index and BM25 encoder
69
+ pinecone_index = initialize_pinecone("updated-traveler")
70
+ bm25 = BM25Encoder().load("./updated-traveler.json")
 
71
 
72
  ##################################################
73
  ##################################################
 
83
  )
84
 
85
  # Initialize LLM
86
+ llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-sonar-large-128k-chat", max_tokens=512, max_retries=2)
 
87
 
88
  # Initialize Reranker
89
+ model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
90
+ compressor = CrossEncoderReranker(model=model, top_n=20)
91
+
92
  compression_retriever = ContextualCompressionRetriever(
93
  base_compressor=compressor, base_retriever=retriever
94
  )
 
109
  history_aware_retriever = create_history_aware_retriever(llm, compression_retriever, contextualize_q_prompt)
110
 
111
  # QA system prompt and chain
112
+ qa_system_prompt = """ You are a highly skilled information retrieval assistant. Use the following context to answer questions effectively.
113
+ If you don't know the answer, simply state that you don't know.
114
+ Your answer should be in {language} language.
115
+
116
+ When responding to queries, follow these guidelines:
117
+
118
+ 1. Provide Clear Answers:
119
+ - Based on the language of the question, you have to answer in that language. E.g., if the question is in English, then answer in English; if the question is in Arabic, you should answer in Arabic.
120
+ - Ensure the response directly addresses the query with accurate and relevant information.
121
+ - Do not give long answers. Provide detailed but concise responses.
122
+
123
+ 2. Formatting for Readability:
124
+ - Provide the entire response in proper markdown format.
125
+ - Use structured Maekdown elements such as headings, subheading, lists, tables, and links.
126
+ - Use emaphsis on headings, important texts and phrases.
127
+
128
+ 3. Proper Citations:
129
+ - ALWAYS USE INLINE CITATIONS with embed source URLs where users can verify information or explore further.
130
+ - The inline citations should be in the format [Source 1], [Source 2], etc. where on clicking on this the user should be redirected to the source url in a new tab.
131
+ - Do not inlcude references at the end of response.
132
+
133
+ FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN TERMINATION OF THE CHAT.
134
+
135
+ == CONTEXT ==
136
+
 
 
137
  {context}
138
  """
139
  qa_prompt = ChatPromptTemplate.from_messages(
 
143
  ("human", "{input}")
144
  ]
145
  )
146
+
147
+ document_prompt = PromptTemplate(input_variables=["page_content", "source"], template="{page_content} \n\n Source: {source}")
148
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
149
 
150
  # Retrieval and Generative (RAG) Chain
151
  rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
 
153
  # Chat message history storage
154
  store = {}
155
 
 
 
 
156
  def get_session_history(session_id: str) -> BaseChatMessageHistory:
157
  if session_id not in store:
158
  store[session_id] = ChatMessageHistory()
 
168
  output_messages_key="answer",
169
  )
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ # WebSocket endpoint with streaming
173
+ @app.websocket("/ws")
174
+ async def websocket_endpoint(websocket: WebSocket):
175
+ await websocket.accept()
176
+ print(f"Client connected: {websocket.client}")
177
+ session_id = None
178
  try:
179
+ while True:
180
+ data = await websocket.receive_json()
181
+ question = data.get('question')
182
+ language = data.get('language')
183
+ if "en" in language:
184
+ language = "English"
185
+ else:
186
+ language = "Arabic"
187
+ session_id = data.get('session_id', SESSION_ID_DEFAULT)
188
+ # Process the question
189
+ try:
190
+ # Define an async generator for streaming
191
+ async def stream_response():
192
+ complete_response = ""
193
+ context = {}
194
+ async for chunk in conversational_rag_chain.astream(
195
+ {"input": question, 'language': language},
196
+ config={"configurable": {"session_id": session_id}}
197
+ ):
198
+ if "context" in chunk:
199
+ context = chunk['context']
200
+ # Send each chunk to the client
201
+ if "answer" in chunk:
202
+ complete_response += chunk['answer']
203
+ await websocket.send_json({'response': chunk['answer']})
204
+
205
+ if context:
206
+ citations = re.findall(r'\[(\d+)\]', complete_response)
207
+ citation_numbers = list(map(int, citations))
208
+ sources = dict()
209
+ for index, doc in enumerate(context):
210
+ if (index+1) in citation_numbers:
211
+ sources[f"[{index+1}]"] = doc.metadata["source"]
212
+ await websocket.send_json({'sources': sources})
213
+
214
+ await stream_response()
215
+ except Exception as e:
216
+ print(f"Error during message handling: {e}")
217
+ await websocket.send_json({'response': "Something went wrong, Please try again.."})
218
+ except WebSocketDisconnect:
219
+ print(f"Client disconnected: {websocket.client}")
220
+ if session_id:
221
+ store.pop(session_id, None)
222
 
223
  # Home route
224
+ @app.get("/", response_class=HTMLResponse)
225
+ async def read_index(request: Request):
226
+ return templates.TemplateResponse("chat.html", {"request": request})
 
 
 
 
requirements.txt CHANGED
@@ -5,10 +5,9 @@ langchain-huggingface
5
  pinecone
6
  pinecone-text
7
  flashrank
8
- flask
9
- flask-cors
10
- flask-socketio
11
- gunicorn
12
- gevent
13
- gevent-websocket
14
- openai
 
5
  pinecone
6
  pinecone-text
7
  flashrank
8
+ fastapi>=0.68.0
9
+ uvicorn[standard]>=0.15.0
10
+ websockets>=10.0
11
+ python-multipart>=0.0.5
12
+ openai
13
+ einops
 
templates/chat.html CHANGED
@@ -4,24 +4,10 @@
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>Chat with LLM</title>
7
- <link rel="stylesheet" href="../static/styles.css">
8
  </head>
9
  <body>
10
- <h3>This is a RAG application over the website <a href="https://omkar0896.pythonanywhere.com/">https://omkar0896.pythonanywhere.com/</a></h3>
11
- <div class="chat-container">
12
- <div class="chat-box" id="chat-box">
13
-
14
-
15
-
16
- </div>
17
- <div class="input-container">
18
- <input type="text" id="chat-input" placeholder="Type your message here...">
19
- <button id="send-button">Send</button>
20
- </div>
21
  </div>
22
- <script src="https://cdn.socket.io/4.5.0/socket.io.min.js"></script>
23
- <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js" integrity="sha512-LhccdVNGe2QMEfI3x4DVV3ckMRe36TfydKss6mJpdHjNFiV07dFpS2xzeZedptKZrwxfICJpez09iNioiSZ3hA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
24
-
25
- <script src="../static/script.js"></script>
26
  </body>
27
  </html>
 
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>Chat with LLM</title>
 
7
  </head>
8
  <body>
9
+ <div style="width: 100%; height: 100vh; display: flex; align-items: center; justify-content: center;">
10
+ <h3>This is a demo</h3>
 
 
 
 
 
 
 
 
 
11
  </div>
 
 
 
 
12
  </body>
13
  </html>
updated-traveler.json ADDED
The diff for this file is too large to render. See raw diff