|
import gradio as gr |
|
|
|
from transformers import pipeline |
|
|
|
title = "Automatic Readability Assessment of Texts in Spanish" |
|
|
|
description = """ |
|
Is a text **complex** or **simple**? Can it be understood by someone learning Spanish with a **basic**, **intermediate** or **advanced** knowledge of the language (*coming soon!*)? Find out with our models below! |
|
""" |
|
|
|
article = """ |
|
|
|
### What's Readability Assessment? |
|
|
|
[Automatic Readability Assessment](https://arxiv.org/abs/2105.00973) consists of determining "how difficult" it could be to read and understand a piece of text. |
|
This could be estimated using readability formulas, such as [Flesch for English](https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests) or [similar ones for Spanish](https://www.siicsalud.com/imagenes/blancopet1.pdf). |
|
However, their dependance on surface statistics (e.g. average sentence length) makes them unreliable. |
|
As such, developing models that could estimate a text's readability by "looking beyond the surface" is a necessity. |
|
|
|
### Goal |
|
|
|
We aim to contribute to the development of **neural models for readability assessment for Spanish**, following previous work for [English](https://aclanthology.org/2021.cl-1.6/) and [Filipino](https://aclanthology.org/2021.ranlp-1.69/). |
|
|
|
|
|
### More Information |
|
|
|
Details about how we trained these models can be found in our [report](https://wandb.ai/readability-es/readability-es/reports/Texts-Readability-Analysis-for-Spanish--VmlldzoxNzU2MDUx). |
|
|
|
|
|
### Team |
|
|
|
- [Laura Vásquez-Rodríguez](https://lmvasque.github.io/) |
|
- Pedro Cuenca |
|
- Sergio Morales |
|
- [Fernando Alva-Manchego](https://feralvam.github.io/) |
|
|
|
""" |
|
|
|
examples = [ |
|
["Esta es una frase simple.", "simple or complex?"], |
|
["La ciencia nos enseña, en efecto, a someter nuestra razón a la verdad y a conocer y juzgar las cosas como son, es decir, como ellas mismas eligen ser y no como quisiéramos que fueran.", "simple or complex?"], |
|
] |
|
|
|
|
|
model_binary = pipeline("sentiment-analysis", model="hackathon-pln-es/readability-es-sentences", return_all_scores=True) |
|
model_ternary = pipeline("sentiment-analysis", model="hackathon-pln-es/readability-es-3class-sentences", return_all_scores=True) |
|
|
|
def predict(text, levels): |
|
if levels == 0: |
|
predicted_scores = model_binary(text)[0] |
|
else: |
|
predicted_scores = model_ternary(text)[0] |
|
|
|
output_scores = {} |
|
for e in predicted_scores: |
|
output_scores[e['label']] = e['score'] |
|
|
|
return output_scores |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=[ |
|
gr.inputs.Textbox(lines=7, placeholder="Write a text in Spanish.", label="Text in Spanish"), |
|
|
|
gr.inputs.Radio(choices=["simple or complex?"], type="index", label="Readability Levels"), |
|
], |
|
outputs=[ |
|
gr.outputs.Label(num_top_classes=3, label="Predicted Readability Level") |
|
], |
|
theme="huggingface", |
|
title = title, description = description, article = article, examples=examples, |
|
allow_flagging="never", |
|
) |
|
iface.launch() |