jzuluagav97
commited on
Upload 7 files
Browse files- README.md +55 -3
- config.json +45 -0
- gitattributes +35 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +7 -0
- tokenizer_config.json +15 -0
- vocab.txt +0 -0
README.md
CHANGED
@@ -1,3 +1,55 @@
|
|
1 |
-
---
|
2 |
-
license: cc-by-nc-4.0
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: cc-by-nc-4.0
|
3 |
+
pipeline_tag: text-classification
|
4 |
+
tags:
|
5 |
+
- BERT
|
6 |
+
- BETO
|
7 |
+
- spanish
|
8 |
+
- sentiment-analysis
|
9 |
+
- text-classification
|
10 |
+
- NLP
|
11 |
+
- transformers
|
12 |
+
widget:
|
13 |
+
- text: Me encanta usar este modelo para análisis de sentimiento.
|
14 |
+
example_title: Sentimiento Positivo
|
15 |
+
- text: Este producto no cumplió mis expectativas.
|
16 |
+
example_title: Sentimiento Negativo
|
17 |
+
- text: El clima está bastante agradable hoy.
|
18 |
+
example_title: Sentimiento Neutro
|
19 |
+
- text: Me siento devastado por las noticias recientes.
|
20 |
+
example_title: Sentimiento Negativo
|
21 |
+
- text: La película estuvo regular, no fue ni buena ni mala.
|
22 |
+
example_title: Sentimiento Neutro
|
23 |
+
language:
|
24 |
+
- es
|
25 |
+
library_name: transformers
|
26 |
+
---
|
27 |
+
|
28 |
+
|
29 |
+
# 🌐 BETO Spanish Sentiment Analysis Model 📝🤖
|
30 |
+
|
31 |
+
📌 **Summary in English**:
|
32 |
+
This sentiment analysis model is based on the BETO, a Spanish variant of BERT.
|
33 |
+
|
34 |
+
### 🎯📊 Model Performance
|
35 |
+
* **Accuracy in 3 categories**: 67.59%
|
36 |
+
* **Classification Report**:
|
37 |
+
|
38 |
+
| Sentiment | Precision | Recall | F1-Score | Support |
|
39 |
+
|-----------|-----------|--------|----------|---------|
|
40 |
+
| Negative | 0.64 | 0.72 | 0.68 | 15844 |
|
41 |
+
| Neutral | 0.64 | 0.54 | 0.58 | 22721 |
|
42 |
+
| Positive | 0.73 | 0.79 | 0.76 | 22233 |
|
43 |
+
| **Weighted Avg** | **0.67** | **0.68** | **0.67** | **60798** |
|
44 |
+
|
45 |
+
### 📔🔗 [Try it on Google Colab!](https://colab.research.google.com/drive/1QQ5N1v5FLoXFXMeYHB8wVzvyxNT9X5oA?usp=sharing) 🌐
|
46 |
+
|
47 |
+
|
48 |
+
## Model and Data Sources
|
49 |
+
Cañete, J., Chaperon, G., Fuentes, R., Pérez, J., & Bustos, B. (2020). Spanish Pre-Trained BERT Model and Evaluation Data. Recuperado de https://arxiv.org/abs/2308.02976
|
50 |
+
|
51 |
+
SEPLN TASS (2012). [Workshop on Semantic Analysis at SEPLN](http://tass.sepln.org/)
|
52 |
+
### License Disclaimer
|
53 |
+
|
54 |
+
The license CC BY 4.0 best describes our intentions for our work. However we are not sure that all the datasets used to train BETO have licenses compatible with CC BY 4.0 (specially for commercial use). Please use at your own discretion and verify that the licenses of the original text resources match your needs.
|
55 |
+
|
config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "NEGATIVE",
|
14 |
+
"1": "VERY NEGATIVE",
|
15 |
+
"2": "NEUTRAL",
|
16 |
+
"3": "UNDEFINED",
|
17 |
+
"4": "POSITIVE",
|
18 |
+
"5": "VERY POSITIVE"
|
19 |
+
},
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 3072,
|
22 |
+
"label2id": {
|
23 |
+
"NEGATIVE": 0,
|
24 |
+
"VERY NEGATIVE": 1,
|
25 |
+
"NEUTRAL": 2,
|
26 |
+
"UNDEFINED": 3,
|
27 |
+
"POSITIVE": 4,
|
28 |
+
"VERY POSITIVE": 5
|
29 |
+
},
|
30 |
+
|
31 |
+
"layer_norm_eps": 1e-12,
|
32 |
+
"max_position_embeddings": 512,
|
33 |
+
"model_type": "bert",
|
34 |
+
"num_attention_heads": 12,
|
35 |
+
"num_hidden_layers": 12,
|
36 |
+
"output_past": true,
|
37 |
+
"pad_token_id": 1,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"problem_type": "single_label_classification",
|
40 |
+
"torch_dtype": "float32",
|
41 |
+
"transformers_version": "4.33.2",
|
42 |
+
"type_vocab_size": 2,
|
43 |
+
"use_cache": true,
|
44 |
+
"vocab_size": 31002
|
45 |
+
}
|
gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68141662ab81efa45243f4d29e005f995f6ae7d53448e57c3f5ba83d2c998b91
|
3 |
+
size 439490353
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"do_basic_tokenize": true,
|
5 |
+
"do_lower_case": false,
|
6 |
+
"mask_token": "[MASK]",
|
7 |
+
"model_max_length": 512,
|
8 |
+
"never_split": null,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"strip_accents": false,
|
12 |
+
"tokenize_chinese_chars": true,
|
13 |
+
"tokenizer_class": "BertTokenizer",
|
14 |
+
"unk_token": "[UNK]"
|
15 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|