|
import gradio as gr |
|
from transformers import TFBertModel, TFXLMRobertaModel |
|
import numpy as np |
|
import tensorflow as tf |
|
from transformers import AutoTokenizer |
|
|
|
app_title = "Portuguese Counter Hate Speech Detection (NFAA)" |
|
|
|
app_description = """ |
|
This app is the culmination of the kNOwHATE consortium project, which aimed to tackle Online Hate Speech in the Portuguese comunity. It serves as an user-friendly interface to classify text and identify instances of Hate Speech. |
|
This app leverages state-of-the-art Natural Language Processing models developed in the scope of this project to classify harmful text. |
|
Select a model from the dropdown menu and input your text to see the classification results. Explore the examples of Hate Speech and Non-Hate Speech offered, and join us in fostering a safer and more respectful online community. |
|
For more information about the kNOwHATE project and its initiatives, visit our website [here](https://knowhate.eu) and to explore and use these models visit our Hugging Face page [here](https://huggingface.co/knowhate). |
|
""" |
|
|
|
|
|
app_examples = [ |
|
["Essa gente tem é de deixar de ser apaparicada pelo Estado e começar a cumprir os seus deveres como cidadãos", |
|
"Nepia o que faz com que as pessoas generalizem é o ódio intrínseco que têm contra uma etnia, ng é responsável pela sua xenofobia", |
|
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"], |
|
["Nem vou comentar o hate e misoginia que tenho visto aqui no tt em relação à Anitta", |
|
"E xenofobia também. Tugas no seu melhor", |
|
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"], |
|
["A Festa tá no Climax, chama o zuca pra Dançar.", |
|
"Já reparaste no contador da luz? Vai trabalhar malandro", |
|
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"] |
|
] |
|
|
|
model_list = [ |
|
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras" |
|
] |
|
|
|
def predict(text, target, chosen_model): |
|
model1 = tf.keras.models.load_model(chosen_model, custom_objects={"TFBertModel": TFBertModel}) |
|
checkpoint = "neuralmind/bert-base-portuguese-cased" |
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True) |
|
tokpair = tokenizer(text, target, truncation=True, padding='max_length', return_tensors='np') |
|
|
|
outp = model1(tokpair) |
|
|
|
proto_tensor = tf.make_tensor_proto(outp) |
|
allscores = tf.make_ndarray(proto_tensor)[0] |
|
|
|
scores_dict = { |
|
'Neutral': allscores[0], |
|
'Counter Speech': allscores[1], |
|
'Hate Speech': allscores[2] |
|
} |
|
|
|
return scores_dict |
|
|
|
inputs = [ |
|
gr.Textbox(label="Context", value= app_examples[0][0]), |
|
gr.Textbox(label="Target", value= app_examples[0][1]), |
|
gr.Dropdown(label="Model", choices=model_list, value=model_list[0]) |
|
] |
|
|
|
outputs = [ |
|
gr.Label(label="Result"), |
|
] |
|
|
|
gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title, |
|
description=app_description, examples=app_examples, theme=gr.themes.Base(primary_hue="red")).launch() |