pfialho's picture
Update app.py
c767863 verified
raw
history blame
2.99 kB
import gradio as gr
from transformers import TFBertModel, TFXLMRobertaModel
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer
app_title = "Portuguese Counter Hate Speech Detection (NFAA)"
app_description = """
This app is the culmination of the kNOwHATE consortium project, which aimed to tackle Online Hate Speech in the Portuguese comunity. It serves as an user-friendly interface to classify text and identify instances of Hate Speech.
This app leverages state-of-the-art Natural Language Processing models developed in the scope of this project to classify harmful text.
Select a model from the dropdown menu and input your text to see the classification results. Explore the examples of Hate Speech and Non-Hate Speech offered, and join us in fostering a safer and more respectful online community.
For more information about the kNOwHATE project and its initiatives, visit our website [here](https://knowhate.eu) and to explore and use these models visit our Hugging Face page [here](https://huggingface.co/knowhate).
"""
# 1 0 2
app_examples = [
["Essa gente tem é de deixar de ser apaparicada pelo Estado e começar a cumprir os seus deveres como cidadãos",
"Nepia o que faz com que as pessoas generalizem é o ódio intrínseco que têm contra uma etnia, ng é responsável pela sua xenofobia",
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"],
["Nem vou comentar o hate e misoginia que tenho visto aqui no tt em relação à Anitta",
"E xenofobia também. Tugas no seu melhor",
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"],
["A Festa tá no Climax, chama o zuca pra Dançar.",
"Já reparaste no contador da luz? Vai trabalhar malandro",
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"]
]
model_list = [
"knowhate/twt-bertimbau/twt-bb-b16e5-avg767.keras"
]
def predict(text, target, chosen_model):
model1 = tf.keras.models.load_model(chosen_model, custom_objects={"TFBertModel": TFBertModel})
checkpoint = "neuralmind/bert-base-portuguese-cased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True)
tokpair = tokenizer(text, target, truncation=True, padding='max_length', return_tensors='np')
outp = model1(tokpair)
proto_tensor = tf.make_tensor_proto(outp)
allscores = tf.make_ndarray(proto_tensor)[0]
scores_dict = {
'Neutral': allscores[0],
'Counter Speech': allscores[1],
'Hate Speech': allscores[2]
}
return scores_dict
inputs = [
gr.Textbox(label="Context", value= app_examples[0][0]),
gr.Textbox(label="Target", value= app_examples[0][1]),
gr.Dropdown(label="Model", choices=model_list, value=model_list[0])
]
outputs = [
gr.Label(label="Result"),
]
gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
description=app_description, examples=app_examples, theme=gr.themes.Base(primary_hue="red")).launch()