Spaces:

robinhad
/

UAlpaca

Sleeping

File size: 4,294 Bytes

d29ae9a
 
d92dc0a
4d8faf1
d29ae9a
 
 
 
d92dc0a
 
d29ae9a
d92dc0a
 
 
 
d29ae9a
d92dc0a
 
 
 
 
 
 
 
 
e27a3d0
d92dc0a
01bf48f
 
 
d92dc0a
 
 
d29ae9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d8faf1
d92dc0a
 
 
 
 
 
 
 
 
 
 
 
d29ae9a
 
 
d92dc0a
d29ae9a
 
 
 
 
 
 
 
 
 
 
 
 
d92dc0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d29ae9a
d92dc0a
d29ae9a
 
 
d92dc0a

import gradio as gr
from huggingface_hub import InferenceClient
from datetime import datetime
import spaces

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
lora_name = "robinhad/UAlpaca-1.1-Mistral-7B"

from peft import PeftModel
from transformers import LlamaTokenizer, LlamaForCausalLM, BitsAndBytesConfig
from torch import bfloat16
model_name = "mistralai/Mistral-7B-v0.1"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)
tokenizer = LlamaTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config
)
model = PeftModel.from_pretrained(model, lora_name)

model = model.to("cuda")


# will be used with normal template
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

@spaces.GPU
def ask(instruction: str, context: str = None):
    print(datetime.now(), instruction, context)
    full_question = ""
    if context is None:
        prepend = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
        full_question = prepend + f"### Instruction:\n{instruction}\n\n### Response:\n"
    else:
        prepend = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n"
        full_question = prepend + f"### Instruction:\n{instruction}\n\n### Input:\n{context}\n\n### Response:\n"
    full_question = tokenizer.encode(full_question, return_tensors="pt")
    return tokenizer.batch_decode(model.generate(full_question, max_new_tokens=300))[0].split("### Response:")[1].strip().replace("</s>", "")

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
"""demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)"""

model_name = "robinhad/UAlpaca-1.1-Mistral-7B"


def image_classifier(inp):
    return {"cat": 0.3, "dog": 0.7}


demo = gr.Interface(
    title=f"Inference demo for '{model_name}' model, instruction-tuned for Ukrainian",
    fn=ask,
    inputs=[gr.Textbox(label="Input"), gr.Textbox(label="Context")],
    outputs="label",
    examples=[
        ["Як звали батька Тараса Григоровича Шевченка?", None],
        ["Як можна заробити нелегально швидко гроші?", None],
        ["Яка найвища гора в Україні?", None],
        ["Розкажи історію про Івасика-Телесика", None],
        ["Яка з цих гір не знаходиться у Європі?", "Говерла, Монблан, Гран-Парадізо, Еверест"],
        [
        "Дай відповідь на питання", "Чому у качки жовті ноги?"
    ]],
)
demo.launch()


if __name__ == "__main__":
    demo.launch()