File size: 4,146 Bytes
d29ae9a d92dc0a 4d8faf1 d29ae9a d92dc0a 856636a d29ae9a 61849bb 856636a d92dc0a d29ae9a d92dc0a 856636a d92dc0a d6b4766 d92dc0a 91208a3 5d9b451 01bf48f dc34b69 d92dc0a 856636a d92dc0a 856636a d29ae9a 856636a d29ae9a 8ace6ee 856636a d29ae9a d92dc0a d29ae9a 856636a d29ae9a 856636a d29ae9a 856636a d92dc0a 856636a e5bddd1 856636a d92dc0a 856636a d92dc0a d29ae9a 856636a d92dc0a d29ae9a d92dc0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
from huggingface_hub import InferenceClient
from datetime import datetime
import spaces
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
lora_name = "robinhad/UAlpaca-2.0-Mistral-7B"
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from torch import bfloat16
model_name = "mistralai/Mistral-7B-v0.1"
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(lora_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quant_config
)
model = PeftModel.from_pretrained(model, lora_name, torch_device="cpu")
model = model.to("cuda")
from transformers import StoppingCriteriaList, StopStringCriteria, TextIteratorStreamer
from threading import Thread
stop_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer, stop_strings=["<|im_end|>"])])
# will be used with normal template
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
max_tokens,
temperature,
top_p,
):
# messages = [{"role": "system", "content": system_message}]
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
tokenized = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to("cuda") #, tokenize=False) #
#print(tokenized)
#tokenized = tokenizer(tokenized, return_tensors="pt")["input_ids"]
print(tokenizer.batch_decode(tokenized)[0])
print("====")
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
generation_kwargs = dict(inputs=tokenized, streamer=streamer, max_new_tokens=max_tokens, stopping_criteria=stop_criteria, top_p=top_p, temperature=temperature)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
generated_text = ""
for new_text in streamer:
generated_text += new_text
# generated_text = generated_text.replace("<|im_start|>assistant\n", "")
generated_text = generated_text.replace("<|im_end|>", "")
yield generated_text
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
#gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
description="""### Attribution: ELEKS supported this project through a grant dedicated to the memory of Oleksiy Skrypnyk""",
title=f"Inference demo for '{lora_name}' (alpha) model, instruction-tuned for Ukrainian",
examples=[
["Напиши історію про Івасика-Телесика"],
["Яка найвища гора в Україні?"],
["Як звали батька Тараса Григоровича Шевченка?"],
#["Як можна заробити нелегально швидко гроші?"],
["Яка з цих гір не знаходиться у Європі? Говерла, Монблан, Гран-Парадізо, Еверест"],
[
"Дай відповідь на питання\nЧому у качки жовті ноги?"
]],
)
demo.launch()
if __name__ == "__main__":
demo.launch()
|