Spaces:

CyranoB
/

search_agent

Sleeping

App Files Files Community

CyranoB commited on Apr 20, 2024

Commit

9847233

1 Parent(s): 8c28786

Review mode

Browse files

Files changed (7) hide show

README.md +2 -0
copywriter.py +37 -5
requirements.txt +5 -1
search_agent.py +10 -8
search_agent_ui.py +80 -18
web_crawler.py +2 -1
web_rag.py +76 -37

README.md CHANGED Viewed

@@ -10,6 +10,8 @@ pinned: false
 license: apache-2.0
 ---
 # Simple Search Agent
 This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.

 license: apache-2.0
 ---
+⚠️ **This project is a demonstration / proof-of-concept and is not intended for use in production environments. It is provided as-is, without warranty or guarantee of any kind. The code and any accompanying materials are for educational, testing, or evaluation purposes only.**⚠️
 # Simple Search Agent
 This Python project provides a search agent that can perform web searches, optimize search queries, fetch and process web content, and generate responses using a language model and the retrieved information.

copywriter.py CHANGED Viewed

@@ -7,7 +7,6 @@ from langchain.prompts.chat import (
 from langchain.prompts.prompt import PromptTemplate
 def get_comments_prompt(query, draft):
     system_message = SystemMessage(
         content="""
@@ -35,14 +34,11 @@ def get_comments_prompt(query, draft):
     )
     return [system_message, human_message]
 def generate_comments(chat_llm, query, draft, callbacks=[]):
     messages = get_comments_prompt(query, draft)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})
     return response.content
 def get_final_text_prompt(query, draft, comments):
     system_message = SystemMessage(
         content="""
@@ -74,4 +70,40 @@ def get_final_text_prompt(query, draft, comments):
 def generate_final_text(chat_llm, query, draft, comments, callbacks=[]):
     messages = get_final_text_prompt(query, draft, comments)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})
-    return response.content

 from langchain.prompts.prompt import PromptTemplate
 def get_comments_prompt(query, draft):
     system_message = SystemMessage(
         content="""
     )
     return [system_message, human_message]
 def generate_comments(chat_llm, query, draft, callbacks=[]):
     messages = get_comments_prompt(query, draft)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})
     return response.content
 def get_final_text_prompt(query, draft, comments):
     system_message = SystemMessage(
         content="""
 def generate_final_text(chat_llm, query, draft, comments, callbacks=[]):
     messages = get_final_text_prompt(query, draft, comments)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})
+    return response.content
+def get_compare_texts_prompts(query, draft_text, final_text):
+    system_message = SystemMessage(
+        content="""
+        I want you to act as a writing quality evaluator.
+        I will provide you with the original user request and four texts.
+        Your task is to carefully analyze, compare the two texts across the following dimensions and grade each text 0 to 10:
+            1. Grammar and spelling - Which text has fewer grammatical errors and spelling mistakes?
+            2. Clarity and coherence - Which text is easier to understand and has a more logical flow of ideas? Evaluate how well each text conveys its main points.
+            3. Tone and style - Which text has a more appropriate and engaging tone and writing style for its intended purpose and audience?
+            4. Sticking to the request - Which text is more successful responding to the original user request. Consider the request, the style, the length, etc.
+            5. Overall effectiveness - Considering the above factors, which text is more successful overall at communicating its message and achieving its goals?
+        After comparing the texts on these criteria, clearly state which text you think is better and summarize the main reasons why.
+        Provide specific examples from each text to support your evaluation.
+        """
+    )
+    human_message = HumanMessage(
+        content=f"""
+        Original query: {query}
+        ------------------------
+        Text 1: {draft_text}
+        ------------------------
+        Text 2: {final_text}
+        ------------------------
+        Summary:
+        """
+    )
+    return [system_message, human_message]
+def compare_text(chat_llm, query, draft, final, callbacks=[]):
+    messages = get_compare_texts_prompts(query, draft_text=draft, final_text=final)
+    response = chat_llm.invoke(messages, config={"callbacks": callbacks})
+    return response.content

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
 boto3
 bs4
 cohere
 docopt
 faiss-cpu
@@ -7,7 +9,7 @@ google-api-python-client
 pdfplumber
 python-dotenv
 langchain
-langchain-cohere
 langchain-fireworks
 langchain_core
 langchain_community
@@ -18,6 +20,8 @@ langsmith
 schema
 streamlit
 selenium
 rich
 trafilatura
 watchdog

+anthropic
 boto3
 bs4
+chromedriver-py
 cohere
 docopt
 faiss-cpu
 pdfplumber
 python-dotenv
 langchain
+langchain-aws
 langchain-fireworks
 langchain_core
 langchain_community
 schema
 streamlit
 selenium
+tiktoken
+transformers
 rich
 trafilatura
 watchdog

search_agent.py CHANGED Viewed

@@ -8,6 +8,7 @@ Usage:
         [--temperature=temp]
         [--copywrite]
         [--max_pages=num]
         [--output=text]
         SEARCH_QUERY
     search_agent.py --version
@@ -21,6 +22,7 @@ Options:
     -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq,ollama,cohere,fireworks) [default: openai]
     -m model --model=model              Use a specific model
     -n num --max_pages=num              Max number of pages to retrieve [default: 10]
     -o text --output=text               Output format (choices: text, markdown) [default: markdown]
 """
@@ -63,8 +65,6 @@ def get_selenium_driver():
     driver = webdriver.Chrome(options=chrome_options)
     return driver
 callbacks = []
 if os.getenv("LANGCHAIN_API_KEY"):
     callbacks.append(
@@ -90,14 +90,16 @@ if __name__ == '__main__':
     temperature = float(arguments["--temperature"])
     domain=arguments["--domain"]
     max_pages=arguments["--max_pages"]
     output=arguments["--output"]
     query = arguments["SEARCH_QUERY"]
     chat, embedding_model = wr.get_models(provider, model, temperature)
-    #console.log(f"Using {chat.model_name} on {provider}")
     with console.status(f"[bold green]Optimizing query for search: {query}"):
         optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
     console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
     with console.status(
@@ -112,11 +114,11 @@ if __name__ == '__main__':
         contents = wc.get_links_contents(sources, get_selenium_driver)
     console.log(f"Managed to extract content from {len(contents)} sources")
-    with console.status(f"[bold green]Embeddubg {len(contents)} sources for content", spinner="growVertical"):
         vector_store = wc.vectorize(contents, embedding_model)
-    with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
-        draft = wr.query_rag(chat, query, optimize_search_query, vector_store, top_k = 5, callbacks=callbacks)
     console.rule(f"[bold green]Response from {provider}")
     if output == "text":
@@ -129,7 +131,7 @@ if __name__ == '__main__':
         with console.status("[bold green]Getting comments from the reviewer", spinner="dots8Bit"):
             comments = cw.generate_comments(chat, query, draft, callbacks=callbacks)
-        console.rule(f"[bold green]Response from reviewer")
         if output == "text":
             console.print(comments)
         else:
@@ -139,7 +141,7 @@ if __name__ == '__main__':
         with console.status("[bold green]Writing the final text", spinner="dots8Bit"):
             final_text = cw.generate_final_text(chat, query, draft, comments, callbacks=callbacks)
-        console.rule(f"[bold green]Final text")
         if output == "text":
             console.print(final_text)
         else:

         [--temperature=temp]
         [--copywrite]
         [--max_pages=num]
+        [--max_extracts=num]
         [--output=text]
         SEARCH_QUERY
     search_agent.py --version
     -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq,ollama,cohere,fireworks) [default: openai]
     -m model --model=model              Use a specific model
     -n num --max_pages=num              Max number of pages to retrieve [default: 10]
+    -e num --max_extracts=num           Max number of page extract to consider [default: 5]
     -o text --output=text               Output format (choices: text, markdown) [default: markdown]
 """
     driver = webdriver.Chrome(options=chrome_options)
     return driver
 callbacks = []
 if os.getenv("LANGCHAIN_API_KEY"):
     callbacks.append(
     temperature = float(arguments["--temperature"])
     domain=arguments["--domain"]
     max_pages=arguments["--max_pages"]
+    max_extract=int(arguments["--max_extracts"])
     output=arguments["--output"]
     query = arguments["SEARCH_QUERY"]
     chat, embedding_model = wr.get_models(provider, model, temperature)
     with console.status(f"[bold green]Optimizing query for search: {query}"):
         optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
+        if len(optimize_search_query) < 3:
+            optimize_search_query = query
     console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
     with console.status(
         contents = wc.get_links_contents(sources, get_selenium_driver)
     console.log(f"Managed to extract content from {len(contents)} sources")
+    with console.status(f"[bold green]Embedding {len(contents)} sources for content", spinner="growVertical"):
         vector_store = wc.vectorize(contents, embedding_model)
+    with console.status("[bold green]Writing content", spinner='dots8Bit'):
+        draft = wr.query_rag(chat, query, optimize_search_query, vector_store, top_k = max_extract, callbacks=callbacks)
     console.rule(f"[bold green]Response from {provider}")
     if output == "text":
         with console.status("[bold green]Getting comments from the reviewer", spinner="dots8Bit"):
             comments = cw.generate_comments(chat, query, draft, callbacks=callbacks)
+        console.rule("[bold green]Response from reviewer")
         if output == "text":
             console.print(comments)
         else:
         with console.status("[bold green]Writing the final text", spinner="dots8Bit"):
             final_text = cw.generate_final_text(chat, query, draft, comments, callbacks=callbacks)
+        console.rule("[bold green]Final text")
         if output == "text":
             console.print(final_text)
         else:

search_agent_ui.py CHANGED Viewed

@@ -10,6 +10,7 @@ from langsmith.client import Client
 import web_rag as wr
 import web_crawler as wc
 dotenv.load_dotenv()
@@ -18,7 +19,6 @@ ls_tracer = LangChainTracer(
     client=Client()
 )
 class StreamHandler(BaseCallbackHandler):
     """Stream handler that appends tokens to container."""
     def __init__(self, container, initial_text=""):
@@ -28,11 +28,36 @@ class StreamHandler(BaseCallbackHandler):
     def on_llm_new_token(self, token: str, **kwargs):
         self.text += token
         self.container.markdown(self.text)
 st.title("🔍 Simple Search Agent 💬")
 if "providers" not in st.session_state:
     providers = []
     if os.getenv("COHERE_API_KEY"):
         providers.append("cohere")
     if os.getenv("OPENAI_API_KEY"):
@@ -41,22 +66,34 @@ if "providers" not in st.session_state:
         providers.append("groq")
     if os.getenv("OLLAMA_API_KEY"):
         providers.append("ollama")
-    if os.getenv("FIREWORKS_API_KEY"):
-        providers.append("fireworks")
     if os.getenv("CREDENTIALS_PROFILE_NAME"):
         providers.append("bedrock")
     st.session_state["providers"] = providers
-with st.sidebar:
-    st.write("Options")
-    model_provider = st.selectbox("🧠 Model provider 🧠", st.session_state["providers"])
-    temperature = st.slider("🌡️ Model temperature 🌡️", 0.0, 1.0, 0.1, help="The higher the more creative")
-    max_pages = st.slider("🔍 Max pages to retrieve 🔍", 1, 20, 15, help="How many web pages to retrive from the internet")
-    top_k_documents = st.slider("📄 How many doc extracts to consider 📄", 1, 20, 5, help="How many of the top extracts to consider")
 if "messages" not in st.session_state:
     st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
 for message in st.session_state.messages:
     st.chat_message(message["role"]).write(message["content"])
     if message["role"] == "assistant" and 'message_id' in message:
@@ -80,6 +117,7 @@ if prompt := st.chat_input("Enter you instructions..." ):
         st.write(f"I should search the web for: {optimize_search_query}")
         sources = wc.get_sources(optimize_search_query, max_pages=max_pages)
         st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
         contents = wc.get_links_contents(sources)
@@ -87,18 +125,42 @@ if prompt := st.chat_input("Enter you instructions..." ):
         st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
         vector_store = wc.vectorize(contents, embedding_model=embedding_model)
         st.write(f"I collected {vector_store.index.ntotal} chunk of data and I can now answer")
-    rag_prompt = wr.build_rag_prompt(prompt, optimize_search_query, vector_store, top_k=5, callbacks=[ls_tracer])
     with st.chat_message("assistant"):
         st_cb = StreamHandler(st.empty())
         result = chat.invoke(rag_prompt, stream=True, config={ "callbacks": [st_cb, ls_tracer]})
         response = result.content.strip()
         message_id = f"{prompt}{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
         st.session_state.messages.append({"role": "assistant", "content": response})
-        if st.session_state.messages[-1]["role"] == "assistant":
-            st.download_button(
-                label="Download",
-                data=st.session_state.messages[-1]["content"],
-                file_name=f"{message_id}.txt",
-                mime="text/plain"
-            )

 import web_rag as wr
 import web_crawler as wc
+import copywriter as cw
 dotenv.load_dotenv()
     client=Client()
 )
 class StreamHandler(BaseCallbackHandler):
     """Stream handler that appends tokens to container."""
     def __init__(self, container, initial_text=""):
     def on_llm_new_token(self, token: str, **kwargs):
         self.text += token
         self.container.markdown(self.text)
+def create_links_markdown(sources_list):
+    """
+    Create a markdown string for each source in the provided JSON.
+    Args:
+        sources_list (list): A list of dictionaries representing the sources.
+                        Each dictionary should have 'title', 'link', and 'snippet' keys.
+    Returns:
+        str: A markdown string with a bullet point for each source,
+             including the title linked to the URL and the snippet.
+    """
+    markdown_list = []
+    for source in sources_list:
+        title = source['title']
+        link = source['link']
+        snippet = source['snippet']
+        markdown = f"- [{title}]({link})\n  {snippet}"
+        markdown_list.append(markdown)
+    return "\n".join(markdown_list)
+st.set_page_config(layout="wide")
 st.title("🔍 Simple Search Agent 💬")
 if "providers" not in st.session_state:
     providers = []
+    if os.getenv("FIREWORKS_API_KEY"):
+        providers.append("fireworks")
     if os.getenv("COHERE_API_KEY"):
         providers.append("cohere")
     if os.getenv("OPENAI_API_KEY"):
         providers.append("groq")
     if os.getenv("OLLAMA_API_KEY"):
         providers.append("ollama")
     if os.getenv("CREDENTIALS_PROFILE_NAME"):
         providers.append("bedrock")
     st.session_state["providers"] = providers
+with st.sidebar.expander("Options", expanded=False):
+    model_provider = st.selectbox("Model provider 🧠", st.session_state["providers"])
+    temperature = st.slider("Model temperature 🌡️", 0.0, 1.0, 0.1, help="The higher the more creative")
+    max_pages = st.slider("Max pages to retrieve 🔍", 1, 20, 15, help="How many web pages to retrive from the internet")
+    top_k_documents = st.slider("Nbr of doc extracts to consider 📄", 1, 20, 5, help="How many of the top extracts to consider")
+    reviewer_mode =  st.checkbox("Draft / Comment / Rewrite mode ✍️", value=False, help="First generate a write, then comments and then rewrite")
+with st.sidebar.expander("Links", expanded=False):
+    links_md = st.markdown("")
+if reviewer_mode:
+    with st.sidebar.expander("Answer review", expanded=False):
+        st.caption("Draft")
+        draft_md = st.markdown("")
+        st.divider()
+        st.caption("Comments")
+        comments_md = st.markdown("")
+        st.divider()
+        st.caption("Comparaison")
+        comparaison_md = st.markdown("")
 if "messages" not in st.session_state:
     st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
 for message in st.session_state.messages:
     st.chat_message(message["role"]).write(message["content"])
     if message["role"] == "assistant" and 'message_id' in message:
         st.write(f"I should search the web for: {optimize_search_query}")
         sources = wc.get_sources(optimize_search_query, max_pages=max_pages)
+        links_md.markdown(create_links_markdown(sources))
         st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
         contents = wc.get_links_contents(sources)
         st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
         vector_store = wc.vectorize(contents, embedding_model=embedding_model)
         st.write(f"I collected {vector_store.index.ntotal} chunk of data and I can now answer")
+        if reviewer_mode:
+            st.write("Creating a draft")
+            draft_prompt = wr.build_rag_prompt(
+                chat, prompt, optimize_search_query,
+                vector_store, top_k=top_k_documents, callbacks=[ls_tracer])
+            draft = chat.invoke(draft_prompt, stream=False, config={ "callbacks": [ls_tracer]})
+            draft_md.markdown(draft.content)
+            st.write("Sending draft for review")
+            comments = cw.generate_comments(chat, prompt, draft, callbacks=[ls_tracer])
+            comments_md.markdown(comments)
+            st.write("Reviewing comments and generating final answer")
+            rag_prompt = cw.get_final_text_prompt(prompt, draft, comments)
+        else:
+            rag_prompt = wr.build_rag_prompt(
+                chat, prompt, optimize_search_query, vector_store,
+                top_k=top_k_documents, callbacks=[ls_tracer]
+            )
     with st.chat_message("assistant"):
         st_cb = StreamHandler(st.empty())
         result = chat.invoke(rag_prompt, stream=True, config={ "callbacks": [st_cb, ls_tracer]})
         response = result.content.strip()
         message_id = f"{prompt}{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
         st.session_state.messages.append({"role": "assistant", "content": response})
+    if st.session_state.messages[-1]["role"] == "assistant":
+        st.download_button(
+            label="Download",
+            data=st.session_state.messages[-1]["content"],
+            file_name=f"{message_id}.txt",
+            mime="text/plain"
+        )
+    if reviewer_mode:
+        compare_prompt = cw.get_compare_texts_prompts(prompt, draft_text=draft, final_text=response)
+        result = chat.invoke(compare_prompt, stream=False, config={ "callbacks": [ls_tracer]})
+        comparaison_md.markdown(result.content)

web_crawler.py CHANGED Viewed

@@ -35,12 +35,13 @@ def get_sources(query, max_pages=10, domain=None):
         json_response = response.json()
         if 'web' not in json_response or 'results' not in json_response['web']:
             raise Exception('Invalid API response format')
         final_results = [{
             'title': result['title'],
             'link': result['url'],
-            'snippet': result['description'],
             'favicon': result.get('profile', {}).get('img', '')
         } for result in json_response['web']['results']]

         json_response = response.json()
         if 'web' not in json_response or 'results' not in json_response['web']:
+            print(response.text)
             raise Exception('Invalid API response format')
         final_results = [{
             'title': result['title'],
             'link': result['url'],
+            'snippet': extract(result['description'], output_format='txt', include_tables=False, include_images=False, include_formatting=True),
             'favicon': result.get('profile', {}).get('img', '')
         } for result in json_response['web']['results']]

web_rag.py CHANGED Viewed

@@ -28,13 +28,14 @@ from langchain.prompts.chat import (
 from langchain.prompts.prompt import PromptTemplate
 from langchain.retrievers.multi_query import MultiQueryRetriever
 from langchain_cohere.chat_models import ChatCohere
 from langchain_cohere.embeddings import CohereEmbeddings
 from langchain_fireworks.chat_models import ChatFireworks
-from langchain_groq import ChatGroq
 from langchain_openai import ChatOpenAI
 from langchain_openai.embeddings import OpenAIEmbeddings
-from langchain_community.chat_models.bedrock import BedrockChat
 from langchain_community.embeddings.bedrock import BedrockEmbeddings
 from langchain_community.chat_models.ollama import ChatOllama
@@ -44,15 +45,15 @@ def get_models(provider, model=None, temperature=0.0):
             credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME')
             if model is None:
                 model = "anthropic.claude-3-sonnet-20240229-v1:0"
-            chat_llm = BedrockChat(
                 credentials_profile_name=credentials_profile_name,
                 model_id=model,
-                model_kwargs={"temperature": temperature, 'max_tokens': 8192 },
             )
-            #embedding_model = BedrockEmbeddings(
-            #    model_id='cohere.embed-multilingual-v3',
-            #    credentials_profile_name=credentials_profile_name
-            #)
             embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
         case 'openai':
             if model is None:
@@ -73,14 +74,17 @@ def get_models(provider, model=None, temperature=0.0):
             if model is None:
                 model = 'command-r-plus'
             chat_llm = ChatCohere(model=model, temperature=temperature)
-            embedding_model = CohereEmbeddings(model="embed-english-light-v3.0")
         case 'fireworks':
             if model is None:
-                model = 'accounts/fireworks/models/mixtral-8x22b-instruct-preview'
-            chat_llm = ChatFireworks(model_name=model, temperature=temperature)
             embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
         case _:
             raise ValueError(f"Unknown LLM provider {provider}")
     return chat_llm, embedding_model
@@ -96,12 +100,13 @@ def get_optimized_search_messages(query):
     """
     system_message = SystemMessage(
         content="""
-            I want you to act as a prompt optimizer for web search. I will provide you with a chat prompt, and your goal is to optimize it into a search string that will yield the most relevant and useful information from a search engine like Google.
             To optimize the prompt:
-            Identify the key information being requested
-            Arrange the keywords into a concise search string
-            Keep it short, around 1 to 5 words total
-            Put the most important keywords first
             Some tips and things to be sure to remove:
             - Remove any conversational or instructional phrases
@@ -110,44 +115,44 @@ def get_optimized_search_messages(query):
             - Remove style instructions (exmaple: "in the style of", engaging, short, long)
             - Remove lenght instruction (example: essay, article, letter, etc)
-            Add "**" to the end of the search string to indicate the end of the query
             Example:
                 Question: How do I bake chocolate chip cookies from scratch?
-                Search query: chocolate chip cookies recipe from scratch**
             Example:
                 Question: I would like you to show me a timeline of Marie Curie's life. Show results as a markdown table
-                Search query: Marie Curie timeline**
             Example:
                 Question: I would like you to write a long article on NATO vs Russia. Use known geopolitical frameworks.
-                Search query: geopolitics nato russia**
             Example:
                 Question: Write an engaging LinkedIn post about Andrew Ng
-                Search query: Andrew Ng**
             Example:
                 Question: Write a short article about the solar system in the style of Carl Sagan
-                Search query: solar system**
             Example:
                 Question: Should I use Kubernetes? Answer in the style of Gilfoyle from the TV show Silicon Valley
-                Search query: Kubernetes decision**
             Example:
                 Question: Biography of Napoleon. Include a table with the major events.
-                Search query: napoleon biography events**
             Example:
                 Question: Write a short article on the history of the United States. Include a table with the major events.
-                Search query: united states history events**
             Example:
                 Question: Write a short article about the solar system in the style of donald trump
-                Search query: solar system**
             Exmaple:
                 Question: Write a short linkedin about how the "freakeconomics" book previsions didn't pan out
-                Search query: freakeconomics book predictions failed**
         """
     )
     human_message = HumanMessage(
         content=f"""
             Question: {query}
-            Search query:
         """
     )
     return [system_message, human_message]
@@ -230,15 +235,49 @@ def multi_query_rag(chat_llm, question, search_query, vectorstore, callbacks = [
     response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
     return response.content
-def build_rag_prompt(question, search_query, vectorstore, top_k = 10, callbacks = []):
-    unique_docs = vectorstore.similarity_search(
-        search_query, k=top_k, callbacks=callbacks, verbose=True)
-    context = format_docs(unique_docs)
-    prompt = get_rag_prompt_template().format(query=question, context=context)
-    return prompt
 def query_rag(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
-    prompt = build_rag_prompt(question, search_query, vectorstore, top_k= top_k, callbacks = callbacks)
     response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
-    return response.content

 from langchain.prompts.prompt import PromptTemplate
 from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain_aws import ChatBedrock
 from langchain_cohere.chat_models import ChatCohere
 from langchain_cohere.embeddings import CohereEmbeddings
 from langchain_fireworks.chat_models import ChatFireworks
+#from langchain_groq import ChatGroq
+from langchain_groq.chat_models import ChatGroq
 from langchain_openai import ChatOpenAI
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_community.embeddings.bedrock import BedrockEmbeddings
 from langchain_community.chat_models.ollama import ChatOllama
             credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME')
             if model is None:
                 model = "anthropic.claude-3-sonnet-20240229-v1:0"
+            chat_llm = ChatBedrock(
                 credentials_profile_name=credentials_profile_name,
                 model_id=model,
+                model_kwargs={"temperature": temperature, "max_tokens":4096 },
+            )
+            embedding_model = BedrockEmbeddings(
+                model_id='cohere.embed-multilingual-v3',
+                credentials_profile_name=credentials_profile_name
             )
             embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
         case 'openai':
             if model is None:
             if model is None:
                 model = 'command-r-plus'
             chat_llm = ChatCohere(model=model, temperature=temperature)
+            #embedding_model = CohereEmbeddings(model="embed-english-light-v3.0")
+            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
         case 'fireworks':
             if model is None:
+                #model = 'accounts/fireworks/models/dbrx-instruct'
+                model = 'accounts/fireworks/models/llama-v3-70b-instruct'
+            chat_llm = ChatFireworks(model_name=model, temperature=temperature, max_tokens=8192)
             embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
         case _:
             raise ValueError(f"Unknown LLM provider {provider}")
     return chat_llm, embedding_model
     """
     system_message = SystemMessage(
         content="""
+            I want you to act as a prompt optimizer for web search.
+            I will provide you with a chat prompt, and your goal is to optimize it into a search string that will yield the most relevant and useful information from a search engine like Google.
             To optimize the prompt:
+            - Identify the key information being requested
+            - Arrange the keywords into a concise search string
+            - Keep it short, around 1 to 5 words total
+            - Put the most important keywords first
             Some tips and things to be sure to remove:
             - Remove any conversational or instructional phrases
             - Remove style instructions (exmaple: "in the style of", engaging, short, long)
             - Remove lenght instruction (example: essay, article, letter, etc)
+            You should answer only with the optimized search query and add "**" to the end of the search string to indicate the end of the query
             Example:
                 Question: How do I bake chocolate chip cookies from scratch?
+                chocolate chip cookies recipe from scratch**
             Example:
                 Question: I would like you to show me a timeline of Marie Curie's life. Show results as a markdown table
+                Marie Curie timeline**
             Example:
                 Question: I would like you to write a long article on NATO vs Russia. Use known geopolitical frameworks.
+                geopolitics nato russia**
             Example:
                 Question: Write an engaging LinkedIn post about Andrew Ng
+                Andrew Ng**
             Example:
                 Question: Write a short article about the solar system in the style of Carl Sagan
+                solar system**
             Example:
                 Question: Should I use Kubernetes? Answer in the style of Gilfoyle from the TV show Silicon Valley
+                Kubernetes decision**
             Example:
                 Question: Biography of Napoleon. Include a table with the major events.
+                napoleon biography events**
             Example:
                 Question: Write a short article on the history of the United States. Include a table with the major events.
+                united states history events**
             Example:
                 Question: Write a short article about the solar system in the style of donald trump
+                solar system**
             Exmaple:
                 Question: Write a short linkedin about how the "freakeconomics" book previsions didn't pan out
+                freakeconomics book predictions failed**
         """
     )
     human_message = HumanMessage(
         content=f"""
             Question: {query}
         """
     )
     return [system_message, human_message]
     response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
     return response.content
+def get_context_size(chat_llm):
+    if isinstance(chat_llm, ChatOpenAI):
+        if chat_llm.model_name.startswith("gpt-4"):
+            return 128000
+        else:
+            return 16385
+    if isinstance(chat_llm, ChatFireworks):
+        return 8192
+    if isinstance(chat_llm, ChatGroq):
+        return 37862
+    if isinstance(chat_llm, ChatOllama):
+        return 8192
+    if isinstance(chat_llm, ChatCohere):
+        return 128000
+    if isinstance(chat_llm, ChatBedrock):
+        if chat_llm.model_id.startswith("anthropic.claude-3"):
+            return 200000
+        if chat_llm.model_id.startswith("anthropic.claude"):
+            return 100000
+        if chat_llm.model_id.startswith("mistral"):
+            if chat_llm.model_id.startswith("mistral.mixtral-8x7b"):
+                return 4096
+            else:
+                return 8192
+    return 4096
+def build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
+    done = False
+    while not done:
+        unique_docs = vectorstore.similarity_search(
+            search_query, k=top_k, callbacks=callbacks, verbose=True)
+        context = format_docs(unique_docs)
+        prompt = get_rag_prompt_template().format(query=question, context=context)
+        nbr_tokens = chat_llm.get_num_tokens(prompt)
+        if  top_k <= 1 or nbr_tokens <= get_context_size(chat_llm) - 768:
+            done = True
+        else:
+            top_k = int(top_k * 0.75)
+    return prompt
 def query_rag(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
+    prompt = build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k=top_k, callbacks = callbacks)
     response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
+    return response.content