import os os.system("pip uninstall -y gradio") os.system("pip install gradio==3.50.2") from huggingface_hub import InferenceClient import gradio as gr """ Chat engine. TODOs: - Better prompts. - Output reader / parser. - Agents for evaluation and task planning / splitting. * Haystack for orchestration - Tools for agents * Haystack for orchestration - """ selected_model = "mistralai/Mixtral-8x7B-Instruct-v0.1" client = InferenceClient(selected_model) def format_prompt(query, history, lookback): prompt = "Responses should be no more than 100 words long.\n" for previous_query, prevous_completion in history[-lookback:]: prompt += f"[INST] {previous_query} [/INST] {prevous_completion} " prompt += f"[INST] {query} [/INST]" return prompt def query_submit(user_message, history): return "", history + [[user_message, None]] def query_completion( query, history, lookback = 3, max_new_tokens = 256, ): generateKwargs = dict( max_new_tokens = max_new_tokens, seed = 1337, ) formatted_query = format_prompt(query, history, lookback) stream = client.text_generation( formatted_query, **generateKwargs, stream = True, details = True, return_full_text = False ) history[-1][1] = "" for response in stream: history[-1][1] += response.token.text yield history def retry_query( history, lookback = 3, max_new_tokens = 256, ): if not history: pass else: query = history[-1][0] history[-1][1] = None generateKwargs = dict( max_new_tokens = max_new_tokens, seed = 1337, ) formatted_query = format_prompt(query, history, lookback) stream = client.text_generation( formatted_query, **generateKwargs, stream = True, details = True, return_full_text = False ) history[-1][1] = "" for response in stream: history[-1][1] += response.token.text yield history """ Chat UI using Gradio Blocks. Blocks preferred for "lower-level" layout control and state management. TODOs: - State management for dynamic components update. - Add scratchpad readout to the right of chat log. * Placeholder added for now. - Add functionality to retry button. * Placeholder added for now. - Add dropdown for model selection. * Placeholder added for now. """ with gr.Blocks() as chatUI: # gr.State() with gr.Row(): modelSelect = gr.Dropdown( label = "Model selection:", scale = 0.5, ) with gr.Row(): chatOutput = gr.Chatbot( bubble_full_width = False, scale = 2 ) agentWhiteBoard = gr.Markdown(scale = 1) with gr.Row(): queryInput = gr.Textbox( placeholder = "Please enter you question or request here...", show_label = False, scale = 4, ) submitButton = gr.Button("Submit", scale = 1) with gr.Row(): fileUpload = gr.File( height = 100, ) retryButton = gr.Button("Retry") clearButton = gr.ClearButton([queryInput, chatOutput]) with gr.Row(): with gr.Accordion(label = "Expand for edit system prompt:"): systemPrompt = gr.Textbox( value = "System prompt here (null)", show_label = False, lines = 4, scale = 4, ) """ Event functions """ queryInput.submit( fn = query_submit, inputs = [queryInput, chatOutput], outputs = [queryInput, chatOutput], queue = False, ).then( fn = query_completion, inputs = [queryInput, chatOutput], outputs = [chatOutput], ) submitButton.click( fn = query_submit, inputs = [queryInput, chatOutput], outputs = [queryInput, chatOutput], queue = False, ).then( fn = query_completion, inputs = [queryInput, chatOutput], outputs = [chatOutput], ) retryButton.click( fn = retry_query, inputs = [chatOutput], outputs = [chatOutput], ) chatUI.queue() chatUI.launch(show_api = False)