# Import required libraries import gradio as gr from huggingface_hub import InferenceClient from transformers import pipeline import torch import huggingfacehub as infer import threading import time import tensorflow as tf from transformers import pipeline # Initialize the text generation pipeline with the specified model pipe = pipeline("text-generation", model="chargoddard/Yi-34B-Llama", device=0) def respond( message, response = "" # Generate the response using the pipeline result = pipe( messages[-1]["content"], max_length=max_tokens, num_return_sequences=1, temperature=temperature, top_p=top_p, ) response = result[0]['generated_text'] yield response) # Gradio interface setup demo = gr.ChatInterface( respond, additional_inputs=[ ], ) if __name__ == "__main__": demo.launch()