AlbertoFH98 commited on
Commit
3a8d578
·
1 Parent(s): eb07d82

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +41 -1
utils.py CHANGED
@@ -2,22 +2,30 @@
2
  # -- Libraries
3
  from typing import Any, Dict, List, Mapping, Optional
4
  from pydantic import Extra, Field, root_validator
 
5
  from langchain.llms.base import LLM
 
 
 
6
  from langchain.utils import get_from_dict_or_env
7
  from langchain.vectorstores import Chroma
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.chains import RetrievalQA
10
  from langchain.document_loaders import TextLoader
11
- from langchain.embeddings import HuggingFaceEmbeddings
12
  from googletrans import Translator
13
  import streamlit as st
14
  import together
15
  import textwrap
 
16
  import spacy
17
  import os
18
  import re
19
 
20
  os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
 
 
 
21
 
22
  # -- LLM class
23
  class TogetherLLM(LLM):
@@ -108,6 +116,38 @@ PREGUNTA:""", cleaned_prompt, re.DOTALL)
108
  text = self.clean_duplicates(text)
109
  return text, new_cleaned_prompt
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # -- Python function to setup basic features: translator, SpaCy pipeline and LLM model
112
  @st.cache_resource
113
  def setup_app(transcription_path, emb_model, model, _logger):
 
2
  # -- Libraries
3
  from typing import Any, Dict, List, Mapping, Optional
4
  from pydantic import Extra, Field, root_validator
5
+ from langchain_core.runnables import RunnablePassthrough
6
  from langchain.llms.base import LLM
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.schema import StrOutputParser
10
  from langchain.utils import get_from_dict_or_env
11
  from langchain.vectorstores import Chroma
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.chains import RetrievalQA
14
  from langchain.document_loaders import TextLoader
15
+ from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
16
  from googletrans import Translator
17
  import streamlit as st
18
  import together
19
  import textwrap
20
+ import getpass
21
  import spacy
22
  import os
23
  import re
24
 
25
  os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
26
+ os.environ["OPENAI_API_KEY"] = "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"
27
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
28
+ os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()
29
 
30
  # -- LLM class
31
  class TogetherLLM(LLM):
 
116
  text = self.clean_duplicates(text)
117
  return text, new_cleaned_prompt
118
 
119
+ # -- Get GPT response
120
+ def get_gpt_response(query):
121
+ template = """Eres un asistente. Su misión es proporcionar respuestas precisas a preguntas relacionadas con la transcripción de una entrevista de YouTube.
122
+ No saludes en tu respuesta. No repita la pregunta en su respuesta. Sea conciso y omita las exenciones de responsabilidad o los mensajes predeterminados.
123
+ Solo responda la pregunta, no agregue texto adicional. No des tu opinión personal ni tu conclusión personal. No haga conjeturas ni suposiciones.
124
+ Si no sabe la respuesta de la pregunta o el contexto está vacío, responda cortésmente por qué no sabe la respuesta. Por favor no comparta información falsa.
125
+ {context}
126
+ Pregunta: {question}
127
+ Respuesta:"""
128
+
129
+ rag_prompt_custom = PromptTemplate.from_template(template)
130
+ docs = loader.load()
131
+
132
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
133
+ splits = text_splitter.split_documents(docs)
134
+
135
+ vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
136
+ retriever = vectorstore.as_retriever()
137
+
138
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
139
+
140
+ def format_docs(docs):
141
+ return "\n\n".join(doc.page_content for doc in docs)
142
+
143
+ rag_chain = (
144
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
145
+ | rag_prompt_custom
146
+ | llm
147
+ | StrOutputParser()
148
+ )
149
+ return rag_chain.invoke(query)
150
+
151
  # -- Python function to setup basic features: translator, SpaCy pipeline and LLM model
152
  @st.cache_resource
153
  def setup_app(transcription_path, emb_model, model, _logger):