Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,12 @@ from flask import Flask, request, Response
|
|
2 |
import logging
|
3 |
from llama_cpp import Llama
|
4 |
import threading
|
5 |
-
from huggingface_hub import snapshot_download
|
|
|
6 |
import gc
|
7 |
import os.path
|
|
|
|
|
8 |
|
9 |
SYSTEM_PROMPT = "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
|
10 |
SYSTEM_TOKEN = 1788
|
@@ -51,6 +54,29 @@ model = None
|
|
51 |
model_path = snapshot_download(repo_id=repo_name, allow_patterns=model_name) + '/' + model_name
|
52 |
app.logger.info('Model path: ' + model_path)
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def init_model(context_size, enable_gpu=False, gpu_layer_number=35):
|
55 |
global model
|
56 |
|
@@ -221,18 +247,8 @@ def generate_response():
|
|
221 |
top_k = parameters.get("top_k", 30)
|
222 |
return_full_text = parameters.get("return_full_text", False)
|
223 |
|
224 |
-
|
225 |
-
# Generate the response
|
226 |
-
#system_tokens = get_system_tokens(model)
|
227 |
-
#tokens = system_tokens
|
228 |
-
|
229 |
-
#if preprompt != "":
|
230 |
-
# tokens = get_system_tokens_for_preprompt(model, preprompt)
|
231 |
-
#else:
|
232 |
tokens = get_system_tokens(model)
|
233 |
-
tokens.append(LINEBREAK_TOKEN)
|
234 |
-
#model.eval(tokens)
|
235 |
-
|
236 |
|
237 |
tokens = []
|
238 |
|
@@ -243,22 +259,13 @@ def generate_response():
|
|
243 |
message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
|
244 |
|
245 |
tokens.extend(message_tokens)
|
246 |
-
|
247 |
-
#app.logger.info('model.eval start')
|
248 |
-
#model.eval(tokens)
|
249 |
-
#app.logger.info('model.eval end')
|
250 |
-
|
251 |
-
#last_message = messages[-1]
|
252 |
-
#if last_message.get("from") == "assistant":
|
253 |
-
# last_message_tokens = get_message_tokens(model=model, role="bot", content=last_message.get("content", ""))
|
254 |
-
#else:
|
255 |
-
# last_message_tokens = get_message_tokens(model=model, role="user", content=last_message.get("content", ""))
|
256 |
|
257 |
tokens.extend([model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN])
|
258 |
|
259 |
|
260 |
app.logger.info('Prompt:')
|
261 |
-
|
|
|
262 |
|
263 |
stop_generation = False
|
264 |
app.logger.info('Generate started')
|
@@ -271,8 +278,20 @@ def generate_response():
|
|
271 |
)
|
272 |
app.logger.info('Generator created')
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
# Use Response to stream tokens
|
275 |
-
return Response(
|
|
|
|
|
276 |
|
277 |
if __name__ == "__main__":
|
278 |
app.run(host="0.0.0.0", port=7860, debug=False, threaded=False)
|
|
|
2 |
import logging
|
3 |
from llama_cpp import Llama
|
4 |
import threading
|
5 |
+
from huggingface_hub import snapshot_download, Repository
|
6 |
+
import huggingface_hub
|
7 |
import gc
|
8 |
import os.path
|
9 |
+
import csv
|
10 |
+
from datetime import datetime
|
11 |
|
12 |
SYSTEM_PROMPT = "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
|
13 |
SYSTEM_TOKEN = 1788
|
|
|
54 |
model_path = snapshot_download(repo_id=repo_name, allow_patterns=model_name) + '/' + model_name
|
55 |
app.logger.info('Model path: ' + model_path)
|
56 |
|
57 |
+
DATASET_REPO_URL = "https://huggingface.co/datasets/muryshev/saiga-chat"
|
58 |
+
DATA_FILENAME = "data.csv"
|
59 |
+
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
60 |
+
|
61 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
62 |
+
app.logger.info("HF_TOKEN is None?", HF_TOKEN is None)
|
63 |
+
app.logger.info("hfh", huggingface_hub.__version__)
|
64 |
+
|
65 |
+
repo = Repository(
|
66 |
+
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
67 |
+
)
|
68 |
+
|
69 |
+
def log(request: str = '', response: str = ''):
|
70 |
+
if request or response:
|
71 |
+
with open(DATA_FILE, "a") as csvfile:
|
72 |
+
writer = csv.DictWriter(csvfile, fieldnames=["request", "response", "time"])
|
73 |
+
writer.writerow(
|
74 |
+
{"request": request, "response": response, "time": str(datetime.now())}
|
75 |
+
)
|
76 |
+
commit_url = repo.push_to_hub()
|
77 |
+
app.logger.info(commit_url)
|
78 |
+
|
79 |
+
|
80 |
def init_model(context_size, enable_gpu=False, gpu_layer_number=35):
|
81 |
global model
|
82 |
|
|
|
247 |
top_k = parameters.get("top_k", 30)
|
248 |
return_full_text = parameters.get("return_full_text", False)
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
tokens = get_system_tokens(model)
|
251 |
+
tokens.append(LINEBREAK_TOKEN)
|
|
|
|
|
252 |
|
253 |
tokens = []
|
254 |
|
|
|
259 |
message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
|
260 |
|
261 |
tokens.extend(message_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
tokens.extend([model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN])
|
264 |
|
265 |
|
266 |
app.logger.info('Prompt:')
|
267 |
+
request = model.detokenize(tokens[:CONTEXT_SIZE]).decode("utf-8", errors="ignore")
|
268 |
+
app.logger.info(request)
|
269 |
|
270 |
stop_generation = False
|
271 |
app.logger.info('Generate started')
|
|
|
278 |
)
|
279 |
app.logger.info('Generator created')
|
280 |
|
281 |
+
|
282 |
+
response_tokens = []
|
283 |
+
def generate_and_log_tokens(model, generator):
|
284 |
+
for token in generate_tokens(model, generator):
|
285 |
+
if token == model.token_eos(): # or (max_new_tokens is not None and i >= max_new_tokens):
|
286 |
+
log(request=request, response=model.detokenize(response_tokens).decode("utf-8", errors="ignore"))
|
287 |
+
break
|
288 |
+
response_tokens.append(token)
|
289 |
+
yield token
|
290 |
+
|
291 |
# Use Response to stream tokens
|
292 |
+
return Response(generate_and_log_tokens(model, generator), content_type='text/plain', status=200, direct_passthrough=True)
|
293 |
+
|
294 |
+
|
295 |
|
296 |
if __name__ == "__main__":
|
297 |
app.run(host="0.0.0.0", port=7860, debug=False, threaded=False)
|