JuanJoseMV's picture
Adding special tokens
f397f4b
raw
history blame
2.95 kB
import gradio as gr
from NeuralTextGenerator import BertTextGenerator
# Load models
## BERT
BERT_model_name = "Twitter/twhin-bert-large"
BERT = BertTextGenerator(BERT_model_name, tokenizer=BERT_model_name)
## RoBERTa
RoBERTa_model_name = "cardiffnlp/twitter-xlm-roberta-base"
RoBERTa = BertTextGenerator(RoBERTa_model_name, tokenizer=RoBERTa_model_name)
## Finetuned BERT
finetunned_BERT_model_name = "JuanJoseMV/BERT_text_gen"
finetunned_BERT = BertTextGenerator(finetunned_BERT_model_name, tokenizer='bert-base-uncased')
## Finetuned RoBERTa
finetunned_RoBERTa_model_name = "JuanJoseMV/XLM_RoBERTa_text_gen"
finetunned_RoBERTa = BertTextGenerator(finetunned_RoBERTa_model_name, tokenizer=finetunned_RoBERTa_model_name)
## Add special tokens
special_tokens = [
'[POSITIVE-0]',
'[POSITIVE-1]',
'[POSITIVE-2]',
'[NEGATIVE-0]',
'[NEGATIVE-1]',
'[NEGATIVE-2]'
]
BERT.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
BERT.model.resize_token_embeddings(len(BERT.tokenizer))
RoBERTa.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
RoBERTa.model.resize_token_embeddings(len(RoBERTa.tokenizer))
finetunned_BERT.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
finetunned_BERT.model.resize_token_embeddings(len(finetunned_BERT.tokenizer))
def sentence_builder(selected_model, n_sentences, max_iter, sentiment, seed_text):
if selected_model == "Finetuned_RoBERTa":
generator = finetunned_RoBERTa
elif selected_model == "Finetuned_BERT":
generator = finetunned_BERT
elif selected_model == "RoBERTa":
generator = RoBERTa
else:
generator = BERT
parameters = {'n_sentences': n_sentences,
'batch_size': 2,
'avg_len':30,
'max_len':50,
# 'std_len' : 3,
'generation_method':'parallel',
'sample': True,
'burnin': 450,
'max_iter': max_iter,
'top_k': 100,
'seed_text': f"[{sentiment}-0] [{sentiment}-1] [{sentiment}-2] {seed_text}",
'verbose': True
}
sents = generator.generate(**parameters)
gen_text = ''
for i, s in enumerate(sents):
gen_text += f'- GENERATED TWEET #{i}: {s}\n'
return gen_text
demo = gr.Interface(
sentence_builder,
[
gr.Radio(["BERT", "RoBERTa", "Finetuned_RoBERTa", "Finetunned_BERT"], value="BERT", label="Generator model"),
gr.Slider(1, 15, value=2, label="Num. Tweets", step=1, info="Number of tweets to be generated."),
gr.Slider(50, 500, value=100, label="Max. iter", info="Maximum number of iterations for the generation."),
gr.Radio(["POSITIVE", "NEGATIVE"], value="POSITIVE", label="Sentiment to generate"),
gr.Textbox('', label="Seed text", info="Seed text for the generation.")
],
"text",
)
demo.launch()