|
from llama_cpp import Llama |
|
import gradio as gr |
|
import random |
|
import requests |
|
import os |
|
|
|
if not os.path.exists("ggml-model-q4_0.bin"): |
|
open("ggml-model-q4_0.bin", "wb").write( |
|
requests.get( |
|
"https://huggingface.co/birdup/pygmalion-7b-q5_1-ggml-v5/resolve/main/pygmalion-7b-q5_1-ggml-v5.bin" |
|
).content |
|
) |
|
else: |
|
print("Model already exists, skipping redownload") |
|
|
|
|
|
print("Loading model...") |
|
llm = Llama( |
|
model_path="ggml-model-q4_0.bin", |
|
seed=random.randint(1, 9999999), |
|
n_ctx=2048, |
|
n_threads=3, |
|
) |
|
print("Model loaded.") |
|
|
|
|
|
def generate(prompt, stop): |
|
output = llm( |
|
bytes(prompt, "utf-8").decode("unicode_escape"), |
|
max_tokens=64, |
|
temperature=0.75, |
|
top_p=0.7, |
|
stop=[bytes(stop, "utf-8").decode("unicode_escape")] if len(stop) > 1 else None, |
|
) |
|
print(output) |
|
return output["choices"][0]["text"] |
|
|
|
|
|
app = gr.Interface(fn=generate, inputs=["text", "text"], outputs="text") |
|
app.launch() |
|
|