NeonLLM / app.py
NeonBohdan's picture
Added info field
43b8dd7
raw
history blame
2.47 kB
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from openai import OpenAI
config = json.loads(os.environ['CONFIG'])
model_names = list(config.keys())
personas = list(OrderedDict.fromkeys(persona for name in config for persona in config[name]["personas"]))
info = "\n".join([f"{model} ({config[model]['name']}): {list(config[model]['personas'].keys())}" for model in model_names])
clients = {}
for name in config:
client = OpenAI(
base_url=f"{os.environ[config[name]['api_url']]}/v1",
api_key=os.environ[config[name]['api_key']],
)
clients[name] = client
def respond(
message,
history: List[Tuple[str, str]],
persona,
model,
info,
conversational,
max_tokens,
):
messages = []
try:
system_prompt = config[model]["personas"][persona]
except KeyError:
supported_personas = list(config[model]["personas"].keys())
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}")
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = clients[model].chat.completions.create(
model="neongeckocom/NeonLLM",
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=personas, value="default", label="persona"),
gr.Radio(choices=model_names, value="stable", label="model"),
gr.Textbox(value=info, interactive=False, label="info"),
gr.Checkbox(value=True, label="conversational"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
],
additional_inputs_accordion=gr.Accordion(label="Config", open=True),
title="NeonLLM (v2024-06-17)",
concurrency_limit=5,
)
if __name__ == "__main__":
demo.launch()