Spaces:
Running
Running
import gradio as gr | |
import os | |
os.system('CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python') | |
import wget | |
from llama_cpp import Llama | |
import random | |
url = 'https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/WizardLM-7B-uncensored.ggmlv3.q2_K.bin' | |
filename = wget.download(url) | |
llm2 = Llama(model_path=filename, seed=random.randint(1, 2**31)) | |
theme = gr.themes.Soft( | |
primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"), | |
neutral_hue="red", | |
) | |
title = """<h1 align="center">Chat with awesome WizardLM model!</h1><br>""" | |
with gr.Blocks(theme=theme) as demo: | |
gr.HTML(title) | |
gr.HTML("This model is awesome for its size! It's 20 times smaller than ChatGPT but seems to be very smart. However, this model like all models, can output factually incorrect information. Please do not rely on it for high stakes decisions.") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox() | |
clear = gr.ClearButton([msg, chatbot]) | |
#instruction = gr.Textbox(label="Instruction", placeholder=) | |
def user(user_message, history): | |
return gr.update(value="", interactive=True), history + [[user_message, None]] | |
def bot(history): | |
#instruction = history[-1][1] or "" | |
user_message = history[-1][0] | |
#token1 = llm.tokenize(b"### Instruction: ") | |
#token2 = llm.tokenize(instruction.encode()) | |
#token3 = llm2.tokenize(b"USER: ") | |
tokens5 = llm2.tokenize(user_message.encode()) | |
token4 = llm2.tokenize(b"\n\n### Response:") | |
#tokens = tokens5 + token4 | |
history[-1][1] = "" | |
count = 0 | |
output = "" | |
for token in llm2.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1): | |
text = llm2.detokenize([token]) | |
output += text.decode() | |
count += 1 | |
if count >= 500 or (token == llm2.token_eos()): | |
break | |
history[-1][1] += text.decode() | |
yield history | |
response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
bot, chatbot, chatbot | |
) | |
response.then(lambda: gr.update(interactive=True), None, [msg], queue=False) | |
gr.HTML("Thanks for checking out this app!") | |
demo.queue() | |
demo.launch(debug=True) | |