Spaces:
Sleeping
Sleeping
File size: 2,356 Bytes
5efa561 8fd3cf8 5efa561 77cf48c 5efa561 9c761bc 5efa561 9c761bc 5efa561 77cf48c 5efa561 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
import os
os.system('CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python')
import wget
from llama_cpp import Llama
import random
url = 'https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/WizardLM-7B-uncensored.ggmlv3.q2_K.bin'
filename = wget.download(url)
llm2 = Llama(model_path=filename, seed=random.randint(1, 2**31))
theme = gr.themes.Soft(
primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
neutral_hue="red",
)
title = """<h1 align="center">Chat with awesome WizardLM model!</h1><br>"""
with gr.Blocks(theme=theme) as demo:
gr.HTML(title)
gr.HTML("This model is awesome for its size! It's 20 times smaller than ChatGPT but seems to be very smart. However, this model like all models, can output factually incorrect information. Please do not rely on it for high stakes decisions.")
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.ClearButton([msg, chatbot])
#instruction = gr.Textbox(label="Instruction", placeholder=)
def user(user_message, history):
return gr.update(value="", interactive=True), history + [[user_message, None]]
def bot(history):
#instruction = history[-1][1] or ""
user_message = history[-1][0]
#token1 = llm.tokenize(b"### Instruction: ")
#token2 = llm.tokenize(instruction.encode())
#token3 = llm2.tokenize(b"USER: ")
tokens5 = llm2.tokenize(user_message.encode())
token4 = llm2.tokenize(b"\n\n### Response:")
#tokens = tokens5 + token4
history[-1][1] = ""
count = 0
output = ""
for token in llm2.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1):
text = llm2.detokenize([token])
output += text.decode()
count += 1
if count >= 500 or (token == llm2.token_eos()):
break
history[-1][1] += text.decode()
yield history
response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, chatbot, chatbot
)
response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
gr.HTML("Thanks for checking out this app!")
demo.queue()
demo.launch(debug=True)
|