maknee commited on
Commit
8ce09d2
·
1 Parent(s): 19ad80d
Files changed (2) hide show
  1. app.py +101 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from kani import Kani
2
+ from kani.engines.ctransformers.llama2 import LlamaCTransformersEngine
3
+
4
+ import gradio as gr
5
+
6
+ title = """<h1 align="center">kani (カニ) demo</h1>"""
7
+ description = """<h3>This is the kani chat demo with llama v2 ggml (cpu only!)</h3>"""
8
+ article = """<div style='display:flex; gap: 0.25rem; '><a href='https://github.com/zhudotexe/kani'><img src='https://img.shields.io/badge/Github-Code-blue'></a></div>"""
9
+
10
+ ai = None
11
+
12
+ def user(message, history):
13
+ history = history or []
14
+ # Append the user's message to the conversation history
15
+ history.append([message, ""])
16
+ return "", history
17
+
18
+ async def chat(history, limit: int = 1024, temp: float = 0.8, top_k: int = 40, top_p: float = 0.9, repeat_penalty: float = 1.1):
19
+ history = history or []
20
+
21
+ message = history[-1][0]
22
+
23
+ history[-1][1] = ""
24
+
25
+ global ai
26
+ if ai is None:
27
+ model_id = "TheBloke/Llama-2-7B-Chat-GGML"
28
+ model_file = "llama-2-7b-chat.ggmlv3.q5_K_M.bin"
29
+
30
+ engine = LlamaCTransformersEngine(model_id,
31
+ model_file,
32
+ max_new_tokens = int(limit),
33
+ temperature = float(temp),
34
+ top_k = int(top_k),
35
+ top_p = float(top_p),
36
+ repetition_penalty = float(repeat_penalty)
37
+ )
38
+ ai = Kani(
39
+ engine,
40
+ system_prompt=(
41
+ "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
42
+ " Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content."
43
+ " Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not"
44
+ " make any sense, or is not factually coherent, explain why instead of answering something not correct. If you"
45
+ " don't know the answer to a question, please don't share false information."
46
+ ),
47
+ )
48
+
49
+ async for output in ai.full_round_str(
50
+ message
51
+ ):
52
+ answer = output
53
+ history[-1][1] += answer
54
+ # stream the response
55
+ yield history, history
56
+
57
+ def clear_state(history, chat_message):
58
+ history = []
59
+ global ai
60
+ ai = None
61
+ return history, gr.update(placeholder='Chat here')
62
+
63
+ def start():
64
+ with gr.Blocks() as demo:
65
+ gr.Markdown(title)
66
+ gr.Markdown(description)
67
+ gr.Markdown(article)
68
+
69
+ with gr.Row():
70
+ with gr.Column(scale=0.5):
71
+ max_tokens = gr.Slider(1, 1024, label="Max Tokens", step=1, value=512)
72
+ temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.05, value=0.8)
73
+ top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95)
74
+ top_k = gr.Slider(0, 100, label="Top K", step=1, value=40)
75
+ repeat_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.1, value=1.1)
76
+
77
+ with gr.Column():
78
+ chatbot = gr.Chatbot(label='Llama v2')
79
+ message = gr.Textbox(label='User', placeholder='Chat here')
80
+ history = gr.State()
81
+
82
+ with gr.Row():
83
+ submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
84
+ clear = gr.Button(value="Reset", variant="secondary").style(full_width=False)
85
+
86
+ clear.click(clear_state, inputs=[history, message], outputs=[history, message], queue=False)
87
+
88
+ submit_click_event = submit.click(
89
+ fn=user, inputs=[message, history], outputs=[message, history], queue=True
90
+ ).then(
91
+ fn=chat, inputs=[history, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, history], queue=True
92
+ )
93
+ message_submit_event = message.submit(
94
+ fn=user, inputs=[message, history], outputs=[message, history], queue=True
95
+ ).then(
96
+ fn=chat, inputs=[history, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, history], queue=True
97
+ )
98
+
99
+ demo.launch(enable_queue=True)
100
+
101
+ start()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ kani @ git+https://github.com/Maknee/kani@ctransformers
2
+ nest-asyncio==1.5.6
3
+ ctransformers==0.2.14
4
+ gradio