Spaces:
Runtime error
Runtime error
AlbertoFH98
commited on
Commit
·
1974d6c
1
Parent(s):
17745cd
Update utils.py
Browse files
utils.py
CHANGED
@@ -13,6 +13,11 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
13 |
from langchain.chains import RetrievalQA
|
14 |
from langchain.document_loaders import TextLoader
|
15 |
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
|
|
|
|
|
|
|
|
|
|
|
16 |
import streamlit as st
|
17 |
import together
|
18 |
import textwrap
|
@@ -24,6 +29,7 @@ import re
|
|
24 |
#os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
|
25 |
#os.environ["OPENAI_API_KEY"] = "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"
|
26 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
|
|
27 |
|
28 |
# -- LLM class
|
29 |
class TogetherLLM(LLM):
|
@@ -114,6 +120,51 @@ PREGUNTA:""", cleaned_prompt, re.DOTALL)
|
|
114 |
text = self.clean_duplicates(text)
|
115 |
return text
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
# -- Get GPT response
|
118 |
def get_gpt_response(transcription_path, query):
|
119 |
template = """Eres un asistente. Su misión es proporcionar respuestas precisas a preguntas relacionadas con la transcripción de una entrevista de YouTube.
|
@@ -145,7 +196,29 @@ def get_gpt_response(transcription_path, query):
|
|
145 |
| llm
|
146 |
| StrOutputParser()
|
147 |
)
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
# -- Python function to setup basic features: SpaCy pipeline and LLM model
|
151 |
@st.cache_resource
|
|
|
13 |
from langchain.chains import RetrievalQA
|
14 |
from langchain.document_loaders import TextLoader
|
15 |
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
|
16 |
+
from langchain.chains import LLMChain
|
17 |
+
from langchain.evaluation import StringEvaluator
|
18 |
+
from typing import Any, Optional
|
19 |
+
from langsmith import Client
|
20 |
+
from langchain.smith import RunEvalConfig, run_on_dataset
|
21 |
import streamlit as st
|
22 |
import together
|
23 |
import textwrap
|
|
|
29 |
#os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
|
30 |
#os.environ["OPENAI_API_KEY"] = "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"
|
31 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
32 |
+
client = Client()
|
33 |
|
34 |
# -- LLM class
|
35 |
class TogetherLLM(LLM):
|
|
|
120 |
text = self.clean_duplicates(text)
|
121 |
return text
|
122 |
|
123 |
+
# -- Langchain evaluator
|
124 |
+
class RelevanceEvaluator(StringEvaluator):
|
125 |
+
"""An LLM-based relevance evaluator."""
|
126 |
+
|
127 |
+
def __init__(self):
|
128 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
129 |
+
|
130 |
+
template = """En una escala del 0 al 100, ¿Como de relevante es la siguiente salida con respecto a la siguiente entrada?
|
131 |
+
--------
|
132 |
+
ENTRADA: {input}
|
133 |
+
--------
|
134 |
+
SALIDA: {prediction}
|
135 |
+
--------
|
136 |
+
Razona paso a paso porqué el score que has elegido es apropiado y despues muestra la puntuacion al final."""
|
137 |
+
|
138 |
+
self.eval_chain = LLMChain.from_string(llm=llm, template=template)
|
139 |
+
|
140 |
+
@property
|
141 |
+
def requires_input(self) -> bool:
|
142 |
+
return True
|
143 |
+
|
144 |
+
@property
|
145 |
+
def requires_reference(self) -> bool:
|
146 |
+
return False
|
147 |
+
|
148 |
+
@property
|
149 |
+
def evaluation_name(self) -> str:
|
150 |
+
return "scored_relevance"
|
151 |
+
|
152 |
+
def _evaluate_strings(
|
153 |
+
self,
|
154 |
+
prediction: str,
|
155 |
+
input: Optional[str] = None,
|
156 |
+
reference: Optional[str] = None,
|
157 |
+
**kwargs: Any
|
158 |
+
) -> dict:
|
159 |
+
evaluator_result = self.eval_chain(
|
160 |
+
dict(input=input, prediction=prediction), **kwargs
|
161 |
+
)
|
162 |
+
reasoning, score = evaluator_result["text"].split("\n", maxsplit=1)
|
163 |
+
score = re.search(r"\d+", score).group(0)
|
164 |
+
if score is not None:
|
165 |
+
score = float(score.strip()) / 100.0
|
166 |
+
return {"score": score, "reasoning": reasoning.strip()}
|
167 |
+
|
168 |
# -- Get GPT response
|
169 |
def get_gpt_response(transcription_path, query):
|
170 |
template = """Eres un asistente. Su misión es proporcionar respuestas precisas a preguntas relacionadas con la transcripción de una entrevista de YouTube.
|
|
|
196 |
| llm
|
197 |
| StrOutputParser()
|
198 |
)
|
199 |
+
llm_output = rag_chain.invoke(query)
|
200 |
+
dataset = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
|
201 |
+
|
202 |
+
client.create_example(
|
203 |
+
inputs={"input": query},
|
204 |
+
outputs={"output": llm_output},
|
205 |
+
dataset_id=dataset.id,
|
206 |
+
)
|
207 |
+
|
208 |
+
# -- Run custom evaluator
|
209 |
+
evaluation_config = RunEvalConfig(
|
210 |
+
custom_evaluators = [RelevanceEvaluator()],
|
211 |
+
)
|
212 |
+
eval_output = run_on_dataset(
|
213 |
+
dataset_name="Sample LLM dataset",
|
214 |
+
llm_or_chain_factory=rag_chain,
|
215 |
+
evaluation=evaluation_config,
|
216 |
+
client=client,
|
217 |
+
)
|
218 |
+
print("Eval output!!!!")
|
219 |
+
print(eval_output)
|
220 |
+
|
221 |
+
return llm_output
|
222 |
|
223 |
# -- Python function to setup basic features: SpaCy pipeline and LLM model
|
224 |
@st.cache_resource
|