AFischer1985
commited on
Commit
·
1a0c402
1
Parent(s):
120246e
Initial commit
Browse files- README.md +6 -6
- requirements.txt +3 -0
- run.py +87 -0
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
|
|
1 |
---
|
2 |
-
title: SauerkrautLM
|
3 |
emoji: 🔥
|
4 |
-
colorFrom:
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
-
app_file:
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
|
2 |
---
|
3 |
+
title: SauerkrautLM-Interface
|
4 |
emoji: 🔥
|
5 |
+
colorFrom: indigo
|
6 |
colorTo: indigo
|
7 |
sdk: gradio
|
8 |
+
sdk_version: 3.47.1
|
9 |
+
app_file: run.py
|
10 |
pinned: false
|
11 |
+
hf_oauth: false
|
12 |
---
|
|
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
llama-cpp-python[server]
|
2 |
+
chromadb
|
3 |
+
sentence_transformers
|
run.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#########################################################################################
|
2 |
+
# Title: Gradio Interface to SauerkrautLM-Chatbot on premises
|
3 |
+
# Author: Andreas Fischer
|
4 |
+
# Date: December 29th, 2023
|
5 |
+
# Last update: December 29th, 2023
|
6 |
+
##########################################################################################
|
7 |
+
|
8 |
+
|
9 |
+
# Get model
|
10 |
+
#-----------
|
11 |
+
|
12 |
+
import os
|
13 |
+
import requests
|
14 |
+
|
15 |
+
modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
|
16 |
+
if(os.path.exists(modelPath)==False):
|
17 |
+
#url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
|
18 |
+
#url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
|
19 |
+
#url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
20 |
+
#url="https://huggingface.co/TheBloke/SauerkrautLM-UNA-SOLAR-Instruct-GGUF/resolve/main/sauerkrautlm-una-solar-instruct.Q4_0.gguf?download=true"
|
21 |
+
#url="https://huggingface.co/TheBloke/SauerkrautLM-7B-v1-GGUF/resolve/main/sauerkrautlm-7b-v1.Q4_0.gguf?download=true"
|
22 |
+
#url="https://huggingface.co/TheBloke/SauerkrautLM-3B-v1-GGUF/resolve/main/sauerkrautlm-3b-v1.Q4_0.gguf?download=true"
|
23 |
+
url="https://huggingface.co/TheBloke/SauerkrautLM-7B-HerO-GGUF/resolve/main/sauerkrautlm-7b-hero.Q4_0.gguf?download=true"
|
24 |
+
response = requests.get(url)
|
25 |
+
with open("./model.gguf", mode="wb") as file:
|
26 |
+
file.write(response.content)
|
27 |
+
print("Model downloaded")
|
28 |
+
modelPath="./model.gguf"
|
29 |
+
|
30 |
+
print(modelPath)
|
31 |
+
|
32 |
+
|
33 |
+
# Llama-cpp-Server
|
34 |
+
#------------------
|
35 |
+
|
36 |
+
import subprocess
|
37 |
+
command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "4"]
|
38 |
+
subprocess.Popen(command)
|
39 |
+
print("Server ready!")
|
40 |
+
|
41 |
+
|
42 |
+
# Gradio-GUI
|
43 |
+
#------------
|
44 |
+
|
45 |
+
import gradio as gr
|
46 |
+
import requests
|
47 |
+
import json
|
48 |
+
def response(message, history):
|
49 |
+
addon=""
|
50 |
+
url="http://localhost:2600/v1/completions"
|
51 |
+
system="Du bist ein KI-basiertes Assistenzsystem."+addon+"\n\nUser-Anliegen:"
|
52 |
+
#body={"prompt":f"{system} USER: {message} ASSISTANT:","max_tokens":500, "echo":"False","stream":"True"} #e.g., WizardLM-13B-v1.2
|
53 |
+
#body={"prompt":f"[INST]{system}\n{message}[/INST]","max_tokens":500, "echo":"False","stream":"True"} #e.g. Mixtral-Instruct
|
54 |
+
#body={"prompt":f"{system} ### Instruktion:\n{message}\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
|
55 |
+
body={"prompt":f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n",
|
56 |
+
"max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM-7B-HerO
|
57 |
+
response=""
|
58 |
+
buffer=""
|
59 |
+
print("URL: "+url)
|
60 |
+
print(str(body))
|
61 |
+
print("User: "+message+"\nAI: ")
|
62 |
+
for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
|
63 |
+
if buffer is None: buffer=""
|
64 |
+
buffer=str("".join(buffer))
|
65 |
+
#print("*** Raw String: "+str(text)+"\n***\n")
|
66 |
+
text=text.decode('utf-8')
|
67 |
+
if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
|
68 |
+
#print("\n*** Buffer: "+str(buffer)+"\n***\n")
|
69 |
+
buffer=buffer.split('"finish_reason": null}]}')
|
70 |
+
if(len(buffer)==1):
|
71 |
+
buffer="".join(buffer)
|
72 |
+
pass
|
73 |
+
if(len(buffer)==2):
|
74 |
+
part=buffer[0]+'"finish_reason": null}]}'
|
75 |
+
if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
|
76 |
+
try:
|
77 |
+
part = str(json.loads(part)["choices"][0]["text"])
|
78 |
+
print(part, end="", flush=True)
|
79 |
+
response=response+part
|
80 |
+
buffer="" # reset buffer
|
81 |
+
except Exception as e:
|
82 |
+
print("Exception:"+str(e))
|
83 |
+
pass
|
84 |
+
yield response
|
85 |
+
|
86 |
+
gr.ChatInterface(response, chatbot=gr.Chatbot(render_markdown=True),title="SauerkrautLM-7B-HerO-GGUF Chat").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
87 |
+
print("Interface up and running!")
|