kwabs22 commited on
Commit
7e4c949
·
1 Parent(s): 43647c4

RAG Placeholder demo test

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -172,13 +172,29 @@ def llmguide_generate_response(prompt, stream=False):
172
  # torch.cuda.empty_cache()
173
  # gc.collect()
174
 
 
 
 
 
 
 
175
  def rag(query, stream=False):
176
  retrieved_docs = retrieve(query)
177
  context = " ".join(retrieved_docs)
178
  prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
179
- return llmguide_generate_response(prompt, stream)
180
-
181
-
 
 
 
 
 
 
 
 
 
 
182
 
183
  #--------------------------------------------------------------------------------------------------------------------------------
184
 
 
172
  # torch.cuda.empty_cache()
173
  # gc.collect()
174
 
175
+ # def rag(query, stream=False):
176
+ # retrieved_docs = retrieve(query)
177
+ # context = " ".join(retrieved_docs)
178
+ # prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
179
+ # return llmguide_generate_response(prompt, stream)
180
+
181
  def rag(query, stream=False):
182
  retrieved_docs = retrieve(query)
183
  context = " ".join(retrieved_docs)
184
  prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
185
+
186
+ generator = llmguide_generate_response(prompt, stream)
187
+
188
+ if stream:
189
+ def stream_output():
190
+ for generated_text, tokens_per_second, ram_usage in generator:
191
+ yield generated_text, tokens_per_second, ram_usage
192
+ return stream_output()
193
+ else:
194
+ # For non-streaming, we just need to get the final output
195
+ for generated_text, tokens_per_second, ram_usage in generator:
196
+ pass # This will iterate to the last yield
197
+ return generated_text, tokens_per_second, ram_usage
198
 
199
  #--------------------------------------------------------------------------------------------------------------------------------
200