|
import gradio as gr |
|
import torch |
|
from transformers import T5Tokenizer, AutoModelForCausalLM |
|
from utils import translate_from_jp_to_en |
|
|
|
tokenizer = T5Tokenizer.from_pretrained("rinna/japanese-gpt-1b") |
|
model = AutoModelForCausalLM.from_pretrained("rinna/japanese-gpt-1b") |
|
|
|
|
|
def generate(text, max_length=128): |
|
|
|
token_ids = tokenizer.encode( |
|
text, add_special_tokens=False, return_tensors="pt") |
|
|
|
with torch.no_grad(): |
|
output_ids = model.generate( |
|
token_ids, |
|
max_length=max_length, |
|
do_sample=True, |
|
top_k=500, |
|
top_p=0.95, |
|
pad_token_id=tokenizer.pad_token_id, |
|
bos_token_id=tokenizer.bos_token_id, |
|
eos_token_id=tokenizer.eos_token_id, |
|
bad_word_ids=[[tokenizer.unk_token_id]] |
|
) |
|
|
|
output = tokenizer.decode(output_ids.tolist()[0]) |
|
return output, translate_from_jp_to_en(output) |
|
|
|
|
|
title = "JP GPT Demo" |
|
description = "Demo for generating text in Japanase using a GPT model" |
|
examples = [['日本のeスポーツ障害者がステレオタイプを撃ち落とす', 128]] |
|
gr.Interface(fn=generate, inputs=[gr.inputs.Textbox(lines=4, label="Prompt"), |
|
gr.inputs.Slider(minimum=8, maximum=1024, step=8, default=64, label="Numbers of tokens")], |
|
outputs=["text", "text"], |
|
title=title, description=description, |
|
|
|
examples=examples).launch(enable_queue=True) |
|
|