Spaces:
Running
on
Zero
Running
on
Zero
kwabs22
commited on
Commit
·
7e4c949
1
Parent(s):
43647c4
RAG Placeholder demo test
Browse files
app.py
CHANGED
@@ -172,13 +172,29 @@ def llmguide_generate_response(prompt, stream=False):
|
|
172 |
# torch.cuda.empty_cache()
|
173 |
# gc.collect()
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
def rag(query, stream=False):
|
176 |
retrieved_docs = retrieve(query)
|
177 |
context = " ".join(retrieved_docs)
|
178 |
prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
#--------------------------------------------------------------------------------------------------------------------------------
|
184 |
|
|
|
172 |
# torch.cuda.empty_cache()
|
173 |
# gc.collect()
|
174 |
|
175 |
+
# def rag(query, stream=False):
|
176 |
+
# retrieved_docs = retrieve(query)
|
177 |
+
# context = " ".join(retrieved_docs)
|
178 |
+
# prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
|
179 |
+
# return llmguide_generate_response(prompt, stream)
|
180 |
+
|
181 |
def rag(query, stream=False):
|
182 |
retrieved_docs = retrieve(query)
|
183 |
context = " ".join(retrieved_docs)
|
184 |
prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
|
185 |
+
|
186 |
+
generator = llmguide_generate_response(prompt, stream)
|
187 |
+
|
188 |
+
if stream:
|
189 |
+
def stream_output():
|
190 |
+
for generated_text, tokens_per_second, ram_usage in generator:
|
191 |
+
yield generated_text, tokens_per_second, ram_usage
|
192 |
+
return stream_output()
|
193 |
+
else:
|
194 |
+
# For non-streaming, we just need to get the final output
|
195 |
+
for generated_text, tokens_per_second, ram_usage in generator:
|
196 |
+
pass # This will iterate to the last yield
|
197 |
+
return generated_text, tokens_per_second, ram_usage
|
198 |
|
199 |
#--------------------------------------------------------------------------------------------------------------------------------
|
200 |
|