Spaces:
Running
Running
File size: 5,426 Bytes
1be87ac fc98e77 21a478e 1be87ac 21a478e 8037c4b 1be87ac 5d7db46 1be87ac 4ca2388 b597dd2 4132916 4ca2388 1be87ac 4ca2388 1be87ac 4ca2388 1be87ac 4ca2388 80eed0f 1be87ac fc98e77 1be87ac 4ca2388 1be87ac 4ca2388 4facf91 c54909c 202881e 181f5cd 4ca2388 fc98e77 40b508f 4ca2388 40b508f 4ca2388 700ffae b597dd2 700ffae 40b508f 700ffae 4132916 700ffae 40b508f 1be87ac fc98e77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
#refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
#huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("myHFtoken")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("Client initialized.")
SYSTEM_PROMPTS = {
"zh-HK": "用香港的廣東話(Cantonese)對話. No chatty. Answer in simple but accurate way.",
"zh-TW": "Chat by Traditional Chinese language of Taiwan (zh-TW). No chatty. Answer in simple but accurate way.",
"EN: General Assistant": "You are a helpful, respectful and honest assistant. Always provide accurate information and admit when you're not sure about something.",
"EN: Code Helper": "You are a programming assistant. Help users with coding questions, debugging, and best practices. Provide clear explanations and code examples when appropriate.",
"EN: Creative Writer": "You are a creative writing assistant. Help users with storytelling, character development, and creative writing techniques. Be imaginative and encouraging."
}
def respond(
message,
history: list[tuple[str, str]],
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name,
):
print(f"Received message: {message}")
print(f"History: {history}")
system_message = custom_prompt if custom_prompt.strip() else SYSTEM_PROMPTS[preset_prompt]
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Selected model: {model_name}")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
print(f"Added user message to context: {val[0]}")
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
print(f"Added assistant message to context: {val[1]}")
messages.append({"role": "user", "content": message})
response = ""
print("Sending request to OpenAI API.")
for message in client.chat.completions.create(
model=model_name,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
token = message.choices[0].delta.content
print(f"Received token: {token}")
response += token
yield response
print("Completed response generation.")
models = [
"ngxson/MiniThinky-v2-1B-Llama-3.2",
"meta-llama/Llama-3.2-3B-Instruct",
"PowerInfer/SmallThinker-3B-Preview",
"NovaSky-AI/Sky-T1-32B-Preview",
"Qwen/QwQ-32B-Preview",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"microsoft/Phi-3-mini-128k-instruct",
]
with gr.Blocks() as demo:
gr.Markdown("# LLM Test")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=models,
value=models[0],
label="Select Model:"
)
# Create the chat components separately
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
container=False
)
clear = gr.Button("Clear")
# Additional inputs
with gr.Accordion("Configuration", open=False):
preset_prompt = gr.Dropdown(
choices=list(SYSTEM_PROMPTS.keys()),
value=list(SYSTEM_PROMPTS.keys())[0],
label="Select System Prompt:"
)
custom_prompt = gr.Textbox(
value="",
label="Custom System Prompt (leaves blank to use preset):",
lines=2
)
max_tokens = gr.Slider(
minimum=1,
maximum=8192,
value=2048,
step=1,
label="Max new tokens:"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.3,
step=0.1,
label="Temperature:"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P:"
)
# Set up the chat functionality
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(
history,
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name
):
history[-1][1] = ""
for character in respond(
history[-1][0],
history[:-1],
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name
):
history[-1][1] = character
yield history
msg.submit(
user,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot,
[chatbot, preset_prompt, custom_prompt, max_tokens, temperature, top_p, model_dropdown],
chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch()
|