AlbertoFH98 commited on
Commit
4ec86c0
·
1 Parent(s): 887ecbd

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +75 -17
utils.py CHANGED
@@ -27,7 +27,7 @@ import os
27
  import re
28
 
29
  #os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
30
- os.environ["OPENAI_API_KEY"] = "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"
31
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
32
 
33
  client = Client()
@@ -198,28 +198,86 @@ def get_gpt_response(transcription_path, query, logger):
198
  | StrOutputParser()
199
  )
200
  llm_output = rag_chain.invoke(query)
201
- dataset = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
202
 
203
- client.create_example(
204
- inputs={"input": query},
205
- outputs={"output": llm_output},
206
- dataset_id=dataset.id,
207
- )
208
 
209
  # -- Run custom evaluator
210
- evaluation_config = RunEvalConfig(
211
- custom_evaluators = [RelevanceEvaluator()],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  )
213
- eval_output = run_on_dataset(
214
- dataset_name="Sample LLM dataset",
215
- llm_or_chain_factory=rag_chain,
216
- evaluation=evaluation_config,
217
- client=client,
 
 
 
 
 
 
218
  )
219
- logger.info("Eval output!!!!")
220
- logger.info(eval_output)
221
 
222
- return llm_output
 
 
 
 
 
223
 
224
  # -- Python function to setup basic features: SpaCy pipeline and LLM model
225
  @st.cache_resource
 
27
  import re
28
 
29
  #os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
30
+ #os.environ["OPENAI_API_KEY"] = "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"
31
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
32
 
33
  client = Client()
 
198
  | StrOutputParser()
199
  )
200
  llm_output = rag_chain.invoke(query)
201
+ # dataset = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
202
 
203
+ # client.create_example(
204
+ # inputs={"input": query},
205
+ # outputs={"output": llm_output},
206
+ # dataset_id=dataset.id,
207
+ # )
208
 
209
  # -- Run custom evaluator
210
+ # evaluation_config = RunEvalConfig(
211
+ # custom_evaluators = [RelevanceEvaluator()],
212
+ # )
213
+ # eval_output = run_on_dataset(
214
+ # dataset_name="Sample LLM dataset",
215
+ # llm_or_chain_factory=rag_chain,
216
+ # evaluation=evaluation_config,
217
+ # client=client,
218
+ # )
219
+ # logger.info("Eval output!!!!")
220
+ # logger.info(eval_output)
221
+
222
+ return llm_output
223
+
224
+ # -- Text summarisation with OpenAI (map-reduce technique)
225
+ def summarise_doc(transcription_path):
226
+ llm = ChatOpenAI(temperature=0)
227
+
228
+ # -- Map
229
+ loader = TextLoader(transcription_path)
230
+ docs = loader.load()
231
+ map_template = """Lo siguiente es listado de fragmentos de una conversacion:
232
+ {docs}
233
+ En base a este listado, por favor identifica los temas/topics principales.
234
+ Respuesta:"""
235
+ map_prompt = PromptTemplate.from_template(map_template)
236
+ map_chain = LLMChain(llm=llm, prompt=map_prompt)
237
+
238
+ # -- Reduce
239
+ reduce_template = """A continuacion se muestra un conjunto de resumenes:
240
+ {docs}
241
+ Usalos para crear un unico resumen consolidado de todos los temas/topics principales.
242
+ Respuesta:"""
243
+ reduce_prompt = PromptTemplate.from_template(reduce_template)
244
+
245
+ # Run chain
246
+ reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
247
+
248
+ # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
249
+ combine_documents_chain = StuffDocumentsChain(
250
+ llm_chain=reduce_chain, document_variable_name="docs"
251
+ )
252
+
253
+ # Combines and iteravely reduces the mapped documents
254
+ reduce_documents_chain = ReduceDocumentsChain(
255
+ # This is final chain that is called.
256
+ combine_documents_chain=combine_documents_chain,
257
+ # If documents exceed context for `StuffDocumentsChain`
258
+ collapse_documents_chain=combine_documents_chain,
259
+ # The maximum number of tokens to group documents into.
260
+ token_max=4000,
261
  )
262
+
263
+ # Combining documents by mapping a chain over them, then combining results
264
+ map_reduce_chain = MapReduceDocumentsChain(
265
+ # Map chain
266
+ llm_chain=map_chain,
267
+ # Reduce chain
268
+ reduce_documents_chain=reduce_documents_chain,
269
+ # The variable name in the llm_chain to put the documents in
270
+ document_variable_name="docs",
271
+ # Return the results of the map steps in the output
272
+ return_intermediate_steps=False,
273
  )
 
 
274
 
275
+ text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
276
+ chunk_size=1000, chunk_overlap=0
277
+ )
278
+ split_docs = text_splitter.split_documents(docs)
279
+
280
+ return map_reduce_chain.run(split_docs)
281
 
282
  # -- Python function to setup basic features: SpaCy pipeline and LLM model
283
  @st.cache_resource