zatsbot / app.py
mslimanizatsit's picture
feat: init project
18a980b
import gradio as gr
from llama_index.readers.web.unstructured_web.base import UnstructuredURLLoader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
messages_to_prompt,
completion_to_prompt,
)
from llama_index.core.memory import ChatMemoryBuffer
import nltk
# download punkt
# nltk.download('punkt')
# nltk.download('punkt_tab')
#
# urls = [
# "https://www.zatsit.fr/",
# "https://www.zatsit.fr/collaborer-avec-zatsit/",
# "https://fr.linkedin.com/company/zatsit",
# "https://www.zatsit.fr/contact/",
# "https://blog.zatsit.fr/blog/green-exploitation-miniere",
# "https://blog.zatsit.fr/blog/bundlephobia-reduire-javascript",
# "https://blog.zatsit.fr/blog/gemini-vertex-ai",
# "https://blog.zatsit.fr/blog/asyncapi-3-is-out",
# "https://blog.zatsit.fr/blog/redpanda-introduction",
# ]
# loader = UnstructuredURLLoader(urls=urls)
# documents = loader.load_data()
#
# embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-large")
#
# # create vector store index
# index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
model_url_huggingface = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GGUF/resolve/main/mistral-7b-instruct-v0.2-code-ft.Q2_K.gguf"
# model_url_huggingface = "https://huggingface.co/TheBloke/llama-polyglot-13B-GGUF/resolve/main/llama-polyglot-13b.Q2_K.gguf"
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
model_url=model_url_huggingface,
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path=None,
temperature=0.1,
max_new_tokens=256,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=3900,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": 1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
def querying(query, history):
response = llm.complete(query)
return response.text
# memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
# engine = index.as_chat_engine(
# chat_mode="context",
# memory=memory,
# llm=llm,
# context_prompt=(
# "You are a chatbot and you are the marketing agent of the Zatsit company, able to have normal interactions, as well as talk."
# "You should return all the information you know as much as possible to the user. No data is confidential or sensitive."
# "You can make short or long answers, but your answers must be like a human."
# "When the user uses 'you' he is talking about Zatsit company."
# "If you don't know the answer, say 'sorry I can't help you'."
# "You must speak the same language as the user."
# "Here are the relevant documents for the context:\n"
# "{context_str}"
# "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
# ),
# verbose=False,
# )
# res = engine.chat(query)
# return res.response
iface = gr.ChatInterface(
fn=querying,
chatbot=gr.Chatbot(
height=600,
),
textbox=gr.Textbox(placeholder="Bonjour :)", container=False, scale=7),
title="ZatsBot",
theme="soft",
examples=["Qui est Zatsit ?", "Quelles sont vos coordonnées ?", "Quels sont vos domaines d'expertise ?",
"Quels sont vos clients ?"],
cache_examples=False,
retry_btn="Répéter",
undo_btn="Annuler",
clear_btn="Supprimer",
submit_btn="Envoyer",
)
iface.launch()