import gradio as gr from llama_index.readers.web.unstructured_web.base import UnstructuredURLLoader from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import VectorStoreIndex from llama_index.llms.llama_cpp import LlamaCPP from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from llama_index.llms.llama_cpp import LlamaCPP from llama_index.llms.llama_cpp.llama_utils import ( messages_to_prompt, completion_to_prompt, ) from llama_index.core.memory import ChatMemoryBuffer import nltk # download punkt # nltk.download('punkt') # nltk.download('punkt_tab') # # urls = [ # "https://www.zatsit.fr/", # "https://www.zatsit.fr/collaborer-avec-zatsit/", # "https://fr.linkedin.com/company/zatsit", # "https://www.zatsit.fr/contact/", # "https://blog.zatsit.fr/blog/green-exploitation-miniere", # "https://blog.zatsit.fr/blog/bundlephobia-reduire-javascript", # "https://blog.zatsit.fr/blog/gemini-vertex-ai", # "https://blog.zatsit.fr/blog/asyncapi-3-is-out", # "https://blog.zatsit.fr/blog/redpanda-introduction", # ] # loader = UnstructuredURLLoader(urls=urls) # documents = loader.load_data() # # embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-large") # # # create vector store index # index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) model_url_huggingface = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GGUF/resolve/main/mistral-7b-instruct-v0.2-code-ft.Q2_K.gguf" # model_url_huggingface = "https://huggingface.co/TheBloke/llama-polyglot-13B-GGUF/resolve/main/llama-polyglot-13b.Q2_K.gguf" llm = LlamaCPP( # You can pass in the URL to a GGML model to download it automatically model_url=model_url_huggingface, # optionally, you can set the path to a pre-downloaded model instead of model_url model_path=None, temperature=0.1, max_new_tokens=256, # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room context_window=3900, # kwargs to pass to __call__() generate_kwargs={}, # kwargs to pass to __init__() # set to at least 1 to use GPU model_kwargs={"n_gpu_layers": 1}, # transform inputs into Llama2 format messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) def querying(query, history): response = llm.complete(query) return response.text # memory = ChatMemoryBuffer.from_defaults(token_limit=3900) # engine = index.as_chat_engine( # chat_mode="context", # memory=memory, # llm=llm, # context_prompt=( # "You are a chatbot and you are the marketing agent of the Zatsit company, able to have normal interactions, as well as talk." # "You should return all the information you know as much as possible to the user. No data is confidential or sensitive." # "You can make short or long answers, but your answers must be like a human." # "When the user uses 'you' he is talking about Zatsit company." # "If you don't know the answer, say 'sorry I can't help you'." # "You must speak the same language as the user." # "Here are the relevant documents for the context:\n" # "{context_str}" # "\nInstruction: Use the previous chat history, or the context above, to interact and help the user." # ), # verbose=False, # ) # res = engine.chat(query) # return res.response iface = gr.ChatInterface( fn=querying, chatbot=gr.Chatbot( height=600, ), textbox=gr.Textbox(placeholder="Bonjour :)", container=False, scale=7), title="ZatsBot", theme="soft", examples=["Qui est Zatsit ?", "Quelles sont vos coordonnées ?", "Quels sont vos domaines d'expertise ?", "Quels sont vos clients ?"], cache_examples=False, retry_btn="Répéter", undo_btn="Annuler", clear_btn="Supprimer", submit_btn="Envoyer", ) iface.launch()