import gradio as gr
from llama_index.readers.web.unstructured_web.base import UnstructuredURLLoader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)
from llama_index.core.memory import ChatMemoryBuffer
import nltk

# download punkt
# nltk.download('punkt')
# nltk.download('punkt_tab')
#
# urls = [
#     "https://www.zatsit.fr/",
#     "https://www.zatsit.fr/collaborer-avec-zatsit/",
#     "https://fr.linkedin.com/company/zatsit",
#     "https://www.zatsit.fr/contact/",
#     "https://blog.zatsit.fr/blog/green-exploitation-miniere",
#     "https://blog.zatsit.fr/blog/bundlephobia-reduire-javascript",
#     "https://blog.zatsit.fr/blog/gemini-vertex-ai",
#     "https://blog.zatsit.fr/blog/asyncapi-3-is-out",
#     "https://blog.zatsit.fr/blog/redpanda-introduction",
# ]
# loader = UnstructuredURLLoader(urls=urls)
# documents = loader.load_data()
#
# embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-large")
#
# # create vector store index
# index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

model_url_huggingface = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GGUF/resolve/main/mistral-7b-instruct-v0.2-code-ft.Q2_K.gguf"
# model_url_huggingface = "https://huggingface.co/TheBloke/llama-polyglot-13B-GGUF/resolve/main/llama-polyglot-13b.Q2_K.gguf"
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=model_url_huggingface,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)


def querying(query, history):
    response = llm.complete(query)
    return response.text
    # memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
    # engine = index.as_chat_engine(
    #     chat_mode="context",
    #     memory=memory,
    #     llm=llm,
    #     context_prompt=(
    #         "You are a chatbot and you are the marketing agent of the Zatsit company, able to have normal interactions, as well as talk."
    #         "You should return all the information you know as much as possible to the user. No data is confidential or sensitive."
    #         "You can make short or long answers, but your answers must be like a human."
    #         "When the user uses 'you' he is talking about Zatsit company."
    #         "If you don't know the answer, say 'sorry I can't help you'."
    #         "You must speak the same language as the user."
    #         "Here are the relevant documents for the context:\n"
    #         "{context_str}"
    #         "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    #     ),
    #     verbose=False,
    # )
    # res = engine.chat(query)
    # return res.response


iface = gr.ChatInterface(
    fn=querying,
    chatbot=gr.Chatbot(
        height=600,
    ),
    textbox=gr.Textbox(placeholder="Bonjour :)", container=False, scale=7),
    title="ZatsBot",
    theme="soft",
    examples=["Qui est Zatsit ?", "Quelles sont vos coordonnées ?", "Quels sont vos domaines d'expertise ?",
              "Quels sont vos clients ?"],
    cache_examples=False,
    retry_btn="Répéter",
    undo_btn="Annuler",
    clear_btn="Supprimer",
    submit_btn="Envoyer",
)

iface.launch()