import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, StoppingCriteriaList import os import torch theme = "darkgrass" title = "Polyglot(Korean) Demo" model_name = "EleutherAI/polyglot-ko-1.3b" bad_words = [ '...', '....', '(중략)', 'http' ] description = "polyglot (1.3B 파라미터 사이즈) 한국어 모델을 시연하는 데모페이지 입니다." article = "

Polyglot: Large Language Models of Well-balanced Competence in Multi-languages

" examples = [ ["CPU와 GPU의 차이는,"], ["질문: 우크라이나 전쟁이 세계3차대전으로 확전이 될까요? \n답변:"], ["2040년 미국은, "] ] tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name ) model.eval() pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, eos_token_id=tokenizer.eos_token_id) def predict(text): with torch.no_grad(): tokens = tokenizer(text, return_tensors="pt").input_ids # generate and end generate if <|endoftext|> is not in text gen_tokens = model.generate( tokens, do_sample=True, temperature=0.8, max_new_tokens=64, top_k=50, top_p=0.8, no_repeat_ngram_size=3, repetition_penalty=1.2, bad_words_ids=[ tokenizer.encode(bad_word) for bad_word in bad_words ], eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id ) generated = tokenizer.batch_decode(gen_tokens)[0] return generated # return pipe(text)[0]['generated_text'] iface = gr.Interface( fn=predict, inputs='text', outputs='text', examples=examples ) iface.launch()