Spaces:
Runtime error
Runtime error
AlbertoFH98
commited on
Commit
·
4ec86c0
1
Parent(s):
887ecbd
Update utils.py
Browse files
utils.py
CHANGED
@@ -27,7 +27,7 @@ import os
|
|
27 |
import re
|
28 |
|
29 |
#os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
|
30 |
-
os.environ["OPENAI_API_KEY"] = "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"
|
31 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
32 |
|
33 |
client = Client()
|
@@ -198,28 +198,86 @@ def get_gpt_response(transcription_path, query, logger):
|
|
198 |
| StrOutputParser()
|
199 |
)
|
200 |
llm_output = rag_chain.invoke(query)
|
201 |
-
dataset = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
|
202 |
|
203 |
-
client.create_example(
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
)
|
208 |
|
209 |
# -- Run custom evaluator
|
210 |
-
evaluation_config = RunEvalConfig(
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
)
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
)
|
219 |
-
logger.info("Eval output!!!!")
|
220 |
-
logger.info(eval_output)
|
221 |
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
# -- Python function to setup basic features: SpaCy pipeline and LLM model
|
225 |
@st.cache_resource
|
|
|
27 |
import re
|
28 |
|
29 |
#os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
|
30 |
+
#os.environ["OPENAI_API_KEY"] = "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"
|
31 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
32 |
|
33 |
client = Client()
|
|
|
198 |
| StrOutputParser()
|
199 |
)
|
200 |
llm_output = rag_chain.invoke(query)
|
201 |
+
# dataset = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
|
202 |
|
203 |
+
# client.create_example(
|
204 |
+
# inputs={"input": query},
|
205 |
+
# outputs={"output": llm_output},
|
206 |
+
# dataset_id=dataset.id,
|
207 |
+
# )
|
208 |
|
209 |
# -- Run custom evaluator
|
210 |
+
# evaluation_config = RunEvalConfig(
|
211 |
+
# custom_evaluators = [RelevanceEvaluator()],
|
212 |
+
# )
|
213 |
+
# eval_output = run_on_dataset(
|
214 |
+
# dataset_name="Sample LLM dataset",
|
215 |
+
# llm_or_chain_factory=rag_chain,
|
216 |
+
# evaluation=evaluation_config,
|
217 |
+
# client=client,
|
218 |
+
# )
|
219 |
+
# logger.info("Eval output!!!!")
|
220 |
+
# logger.info(eval_output)
|
221 |
+
|
222 |
+
return llm_output
|
223 |
+
|
224 |
+
# -- Text summarisation with OpenAI (map-reduce technique)
|
225 |
+
def summarise_doc(transcription_path):
|
226 |
+
llm = ChatOpenAI(temperature=0)
|
227 |
+
|
228 |
+
# -- Map
|
229 |
+
loader = TextLoader(transcription_path)
|
230 |
+
docs = loader.load()
|
231 |
+
map_template = """Lo siguiente es listado de fragmentos de una conversacion:
|
232 |
+
{docs}
|
233 |
+
En base a este listado, por favor identifica los temas/topics principales.
|
234 |
+
Respuesta:"""
|
235 |
+
map_prompt = PromptTemplate.from_template(map_template)
|
236 |
+
map_chain = LLMChain(llm=llm, prompt=map_prompt)
|
237 |
+
|
238 |
+
# -- Reduce
|
239 |
+
reduce_template = """A continuacion se muestra un conjunto de resumenes:
|
240 |
+
{docs}
|
241 |
+
Usalos para crear un unico resumen consolidado de todos los temas/topics principales.
|
242 |
+
Respuesta:"""
|
243 |
+
reduce_prompt = PromptTemplate.from_template(reduce_template)
|
244 |
+
|
245 |
+
# Run chain
|
246 |
+
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
|
247 |
+
|
248 |
+
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
|
249 |
+
combine_documents_chain = StuffDocumentsChain(
|
250 |
+
llm_chain=reduce_chain, document_variable_name="docs"
|
251 |
+
)
|
252 |
+
|
253 |
+
# Combines and iteravely reduces the mapped documents
|
254 |
+
reduce_documents_chain = ReduceDocumentsChain(
|
255 |
+
# This is final chain that is called.
|
256 |
+
combine_documents_chain=combine_documents_chain,
|
257 |
+
# If documents exceed context for `StuffDocumentsChain`
|
258 |
+
collapse_documents_chain=combine_documents_chain,
|
259 |
+
# The maximum number of tokens to group documents into.
|
260 |
+
token_max=4000,
|
261 |
)
|
262 |
+
|
263 |
+
# Combining documents by mapping a chain over them, then combining results
|
264 |
+
map_reduce_chain = MapReduceDocumentsChain(
|
265 |
+
# Map chain
|
266 |
+
llm_chain=map_chain,
|
267 |
+
# Reduce chain
|
268 |
+
reduce_documents_chain=reduce_documents_chain,
|
269 |
+
# The variable name in the llm_chain to put the documents in
|
270 |
+
document_variable_name="docs",
|
271 |
+
# Return the results of the map steps in the output
|
272 |
+
return_intermediate_steps=False,
|
273 |
)
|
|
|
|
|
274 |
|
275 |
+
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
|
276 |
+
chunk_size=1000, chunk_overlap=0
|
277 |
+
)
|
278 |
+
split_docs = text_splitter.split_documents(docs)
|
279 |
+
|
280 |
+
return map_reduce_chain.run(split_docs)
|
281 |
|
282 |
# -- Python function to setup basic features: SpaCy pipeline and LLM model
|
283 |
@st.cache_resource
|