Spaces:
Sleeping
Sleeping
import os | |
import json | |
from typing import List, Tuple | |
from collections import OrderedDict | |
import gradio as gr | |
from openai import OpenAI | |
config = json.loads(os.environ['CONFIG']) | |
model_names = list(config.keys()) | |
personas = list(OrderedDict.fromkeys(persona for name in config for persona in config[name]["personas"])) | |
info = "\n".join([f"{model} ({config[model]['name']}): {list(config[model]['personas'].keys())}" for model in model_names]) | |
clients = {} | |
for name in config: | |
client = OpenAI( | |
base_url=f"{os.environ[config[name]['api_url']]}/v1", | |
api_key=os.environ[config[name]['api_key']], | |
) | |
clients[name] = client | |
def respond( | |
message, | |
history: List[Tuple[str, str]], | |
persona, | |
model, | |
info, | |
conversational, | |
max_tokens, | |
): | |
messages = [] | |
try: | |
system_prompt = config[model]["personas"][persona] | |
except KeyError: | |
supported_personas = list(config[model]["personas"].keys()) | |
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}") | |
if system_prompt is not None: | |
messages.append({"role": "system", "content": system_prompt}) | |
if conversational: | |
for val in history[-2:]: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
completion = clients[model].chat.completions.create( | |
model="neongeckocom/NeonLLM", | |
messages=messages, | |
max_tokens=max_tokens, | |
temperature=0, | |
extra_body={ | |
"repetition_penalty": 1.05, | |
"use_beam_search": True, | |
"best_of": 5, | |
}, | |
) | |
response = completion.choices[0].message.content | |
return response | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Radio(choices=personas, value="default", label="persona"), | |
gr.Radio(choices=model_names, value="stable", label="model"), | |
gr.Textbox(value=info, interactive=False, label="info"), | |
gr.Checkbox(value=True, label="conversational"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
], | |
additional_inputs_accordion=gr.Accordion(label="Config", open=True), | |
title="NeonLLM (v2024-06-17)", | |
concurrency_limit=5, | |
) | |
if __name__ == "__main__": | |
demo.launch() |