WizardLM7b

Running

WizardLM7b / app.py

john

Update app.py

9c761bc over 1 year ago

2.36 kB

	import gradio as gr
	import os
	os.system('CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python')
	import wget
	from llama_cpp import Llama
	import random
	url = 'https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/WizardLM-7B-uncensored.ggmlv3.q2_K.bin'
	filename = wget.download(url)
	llm2 = Llama(model_path=filename, seed=random.randint(1, 2**31))
	theme = gr.themes.Soft(
	primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
	neutral_hue="red",
	)
	title = """<h1 align="center">Chat with awesome WizardLM model!</h1><br>"""
	with gr.Blocks(theme=theme) as demo:
	gr.HTML(title)
	gr.HTML("This model is awesome for its size! It's 20 times smaller than ChatGPT but seems to be very smart. However, this model like all models, can output factually incorrect information. Please do not rely on it for high stakes decisions.")
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.ClearButton([msg, chatbot])
	#instruction = gr.Textbox(label="Instruction", placeholder=)

	def user(user_message, history):
	return gr.update(value="", interactive=True), history + [[user_message, None]]

	def bot(history):
	#instruction = history[-1][1] or ""
	user_message = history[-1][0]
	#token1 = llm.tokenize(b"### Instruction: ")
	#token2 = llm.tokenize(instruction.encode())
	#token3 = llm2.tokenize(b"USER: ")
	tokens5 = llm2.tokenize(user_message.encode())
	token4 = llm2.tokenize(b"\n\n### Response:")
	#tokens = tokens5 + token4
	history[-1][1] = ""
	count = 0
	output = ""
	for token in llm2.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1):
	text = llm2.detokenize([token])
	output += text.decode()
	count += 1
	if count >= 500 or (token == llm2.token_eos()):
	break
	history[-1][1] += text.decode()
	yield history

	response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
	gr.HTML("Thanks for checking out this app!")

	demo.queue()
	demo.launch(debug=True)