Spaces:
Runtime error
Runtime error
import os | |
from huggingface_hub import InferenceClient | |
import gradio as gr | |
from gradio_client import Client | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
inference_client = InferenceClient(model_id, token=os.environ["HF_TOKEN"]) | |
docs_embeddings_space_id = "huggingchat/hf-docs" | |
gradio_client = Client(docs_embeddings_space_id) | |
SYSTEM_PROMPT = "You are a Hugging Face AI expert. Use the provided context to answer user questions. If the request is not realted to Hugging Face Hub or Hugging Face open source libraries, you MUST respond with: \"I can only chat about Hugging Face\" and STOP answering." # from https://huggingface.co/chat/settings/assistants/65f33e95d854946bb3f88dde | |
def generate(prompt, history): | |
try: | |
# step 1: get relevant docs excerpts | |
rag_content, sourced_md = gradio_client.predict( | |
query_text=prompt, | |
output_option="RAG-friendly", | |
api_name="/predict" | |
) | |
# step 2; generate answer | |
processed_prompt = f'''Answer the question: "{prompt}"\ | |
Here are relevant extract from docs that you can use to generate the answer: | |
===================== | |
{rag_content} | |
=====================''' | |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
for user_msg, assistant_msg in history: | |
assistant_msg = assistant_msg.split("\n\nsources:")[0] | |
messages.extend([{"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg}]) | |
messages.append({"role": "user", "content": processed_prompt}) | |
generate_kwargs = dict( | |
temperature=0.6, | |
max_tokens=8192, | |
top_p=0.95, | |
) | |
output = "" | |
for token in inference_client.chat_completion(messages, stream=True, **generate_kwargs): | |
new_content = token.choices[0].delta.content | |
output += new_content | |
yield output + f"\n\nsources: {sourced_md}" | |
return output + f"\n\nsources: {sourced_md}" | |
except Exception as e: | |
raise gr.Error(e) | |
examples = ["How do upload a model?", | |
"Can I change the color of my Space?", | |
"How do I finetune Stable Diffusion with Lora?", | |
"How do I run a model found on the Hugging Face Hub?"] | |
demo = gr.ChatInterface( | |
fn=generate, | |
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), | |
title="HF Docs Bot 🤗", | |
examples=examples, | |
concurrency_limit=400, | |
stop_btn = None, | |
retry_btn = None, | |
undo_btn = None, | |
clear_btn = None, | |
cache_examples=False | |
) | |
demo.launch(show_api=False) | |