|
|
|
|
|
|
|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
from transformers import pipeline |
|
import torch |
|
import huggingfacehub as infer |
|
import threading |
|
import time |
|
import tensorflow as tf |
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipe = pipeline("text-generation", model="chargoddard/Yi-34B-Llama", device=0) |
|
|
|
def respond( |
|
message, |
|
|
|
response = "" |
|
|
|
|
|
result = pipe( |
|
messages[-1]["content"], |
|
max_length=max_tokens, |
|
num_return_sequences=1, |
|
temperature=temperature, |
|
top_p=top_p, |
|
) |
|
|
|
|
|
response = result[0]['generated_text'] |
|
yield response) |
|
|
|
|
|
|
|
|
|
demo = gr.ChatInterface( |
|
respond, |
|
additional_inputs=[ |
|
], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |