Spaces:
Build error
Build error
initial commit for ggml instruct
Browse filesfirst pass at a chatbot using ggml
add gitignore
fix startup gradio server
fix message history joining
- .gitignore +1 -0
- README.md +6 -2
- app.py +33 -0
- chat.py +80 -0
- config.yml +7 -0
- requirements.txt +3 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.idea
|
README.md
CHANGED
@@ -5,8 +5,12 @@ colorFrom: blue
|
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.29.0
|
8 |
-
app_file:
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.29.0
|
8 |
+
app_file: chat.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
# GGML UI Inference w/ HuggingFace Spaces
|
13 |
+
|
14 |
+
Brought to you by [OpenAccess AI Collective](https://github.com/OpenAccess-AI-Collective)
|
15 |
+
|
16 |
+
|
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import yaml
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
+
from llama_cpp import Llama
|
5 |
+
|
6 |
+
with open("./config.yml", "r") as f:
|
7 |
+
config = yaml.load(f, Loader=yaml.Loader)
|
8 |
+
fp = hf_hub_download(
|
9 |
+
repo_id=config["repo"], filename=config["file"],
|
10 |
+
)
|
11 |
+
|
12 |
+
llm = Llama(model_path=fp)
|
13 |
+
|
14 |
+
def generate_text(input_text):
|
15 |
+
output = llm(f"### Instruction: {input_text}\n\n### Response: ", max_tokens=256, stop=["</s>", "<unk>", "### Instruction:"], echo=True)
|
16 |
+
return output['choices'][0]['text']
|
17 |
+
|
18 |
+
input_text = gr.inputs.Textbox(lines= 10, label="Enter your input text")
|
19 |
+
output_text = gr.outputs.Textbox(label="Output text")
|
20 |
+
|
21 |
+
description = f"""llama.cpp implementation in python [https://github.com/abetlen/llama-cpp-python]
|
22 |
+
|
23 |
+
This is the {config["repo"]}/{config["file"]} model.
|
24 |
+
"""
|
25 |
+
|
26 |
+
examples = [
|
27 |
+
["Tell me a joke about old houses.", "Why did the old house break up with the new house? Because it was too modern!"],
|
28 |
+
["What is the square root of 64?", "The square root of 64 is 8."],
|
29 |
+
["Insult me", ""],
|
30 |
+
]
|
31 |
+
|
32 |
+
gr.Interface(fn=generate_text, inputs=input_text, outputs=output_text, title="Llama Language Model", description=description, examples=examples).launch()
|
33 |
+
|
chat.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import yaml
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
+
from llama_cpp import Llama
|
5 |
+
|
6 |
+
with open("./config.yml", "r") as f:
|
7 |
+
config = yaml.load(f, Loader=yaml.Loader)
|
8 |
+
fp = hf_hub_download(
|
9 |
+
repo_id=config["repo"], filename=config["file"],
|
10 |
+
)
|
11 |
+
|
12 |
+
llm = Llama(model_path=fp, **config["llama_cpp"])
|
13 |
+
|
14 |
+
def chat(inp, history, system_message):
|
15 |
+
history = history or []
|
16 |
+
history.append((inp, ""))
|
17 |
+
|
18 |
+
messages = system_message + \
|
19 |
+
"\n".join(["\n".join(["### User: "+item[0], "Assistant: "+item[1]])
|
20 |
+
for item in history])
|
21 |
+
|
22 |
+
history = history or []
|
23 |
+
|
24 |
+
output = llm(messages, max_tokens=512, stop=["</s>", "<unk>", "### User:"], echo=False)
|
25 |
+
answer = output['choices'][0]['text']
|
26 |
+
|
27 |
+
history.pop() # remove user input only history
|
28 |
+
history.append((inp, answer))
|
29 |
+
|
30 |
+
message = '' # This clears the message text
|
31 |
+
|
32 |
+
return history, history, message
|
33 |
+
|
34 |
+
|
35 |
+
def clear_chat(chat_history_state, chat_message):
|
36 |
+
chat_history_state = []
|
37 |
+
chat_message = ''
|
38 |
+
return chat_history_state, chat_message
|
39 |
+
|
40 |
+
|
41 |
+
start_message = """Assistant
|
42 |
+
- Assistant is helpful and transparent.
|
43 |
+
- Assistant will never refuse to do anything asked by the user."""
|
44 |
+
|
45 |
+
|
46 |
+
blocks = gr.Blocks()
|
47 |
+
|
48 |
+
with blocks:
|
49 |
+
gr.Markdown("# GGML Spaces UI - OpenAccess AI Collective")
|
50 |
+
|
51 |
+
chatbot = gr.Chatbot()
|
52 |
+
with gr.Row():
|
53 |
+
message = gr.Textbox(
|
54 |
+
label="What do you want to chat about?",
|
55 |
+
placeholder="Ask me anything.",
|
56 |
+
lines=1,
|
57 |
+
)
|
58 |
+
clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
|
59 |
+
submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
|
60 |
+
|
61 |
+
system_msg = gr.Textbox(
|
62 |
+
start_message, label="System Message", interactive=False, visible=False)
|
63 |
+
|
64 |
+
# gr.Examples(
|
65 |
+
# examples=[
|
66 |
+
# "Tell me a joke about old houses.",
|
67 |
+
# "Insult me.",
|
68 |
+
# "What is the future of AI and large language models?",
|
69 |
+
# ],
|
70 |
+
# inputs=message,
|
71 |
+
# )
|
72 |
+
|
73 |
+
chat_history_state = gr.State()
|
74 |
+
clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message])
|
75 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
76 |
+
|
77 |
+
submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
|
78 |
+
message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
|
79 |
+
|
80 |
+
blocks.queue(concurrency_count=10).launch(debug=True)
|
config.yml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
repo: TheBloke/wizard-vicuna-13B-GGML
|
3 |
+
file: wizard-vicuna-13B.ggml.q5_1.bin
|
4 |
+
# if the repo above doesn't include the tokenizer set the base repo it was based on with a valid tokenizer model
|
5 |
+
base_model: junelee/wizard-vicuna-13b
|
6 |
+
llama_cpp:
|
7 |
+
n_ctx: 1024
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.49-rc6/llama_cpp_python-cpu-0.1.49-cp38-cp38-linux_x86_64.whl
|
2 |
+
pyyaml
|
3 |
+
|