lama-test / app.py
spuun's picture
Update app.py
b32b929
raw
history blame contribute delete
988 Bytes
from llama_cpp import Llama
import gradio as gr
import random
import requests
import os
if not os.path.exists("ggml-model-q4_0.bin"):
open("ggml-model-q4_0.bin", "wb").write(
requests.get(
"https://huggingface.co/birdup/pygmalion-7b-q5_1-ggml-v5/resolve/main/pygmalion-7b-q5_1-ggml-v5.bin"
).content
)
else:
print("Model already exists, skipping redownload")
print("Loading model...")
llm = Llama(
model_path="ggml-model-q4_0.bin",
seed=random.randint(1, 9999999),
n_ctx=2048,
n_threads=3,
)
print("Model loaded.")
def generate(prompt, stop):
output = llm(
bytes(prompt, "utf-8").decode("unicode_escape"),
max_tokens=64,
temperature=0.75,
top_p=0.7,
stop=[bytes(stop, "utf-8").decode("unicode_escape")] if len(stop) > 1 else None,
)
print(output)
return output["choices"][0]["text"]
app = gr.Interface(fn=generate, inputs=["text", "text"], outputs="text")
app.launch()