%%capture
!pip install transformers wandb torchmetrics lightning

Requirement already satisfied: transformers in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (4.27.4)
Requirement already satisfied: wandb in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (0.14.2)
Collecting torchmetrics
  Using cached torchmetrics-0.11.4-py3-none-any.whl (519 kB)
Collecting lightning
  Downloading lightning-2.0.3-py3-none-any.whl (1.8 MB)
     ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
     - -------------------------------------- 0.1/1.8 MB 2.6 MB/s eta 0:00:01
     ----- ---------------------------------- 0.2/1.8 MB 2.9 MB/s eta 0:00:01
     ----------- ---------------------------- 0.5/1.8 MB 4.6 MB/s eta 0:00:01
     ------------------------ --------------- 1.1/1.8 MB 6.5 MB/s eta 0:00:01
     ---------------------------------------- 1.8/1.8 MB 8.4 MB/s eta 0:00:00
Requirement already satisfied: tqdm>=4.27 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (4.65.0)
Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (0.13.4)
Requirement already satisfied: numpy>=1.17 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (1.24.2)
Requirement already satisfied: regex!=2019.12.17 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (2023.3.23)
Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (0.13.3)
Requirement already satisfied: filelock in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (3.11.0)
Requirement already satisfied: requests in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (2.28.2)
Requirement already satisfied: packaging>=20.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (23.0)
Requirement already satisfied: pyyaml>=5.1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from transformers) (6.0)
Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (3.1.31)
Requirement already satisfied: pathtools in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (0.1.2)
Requirement already satisfied: appdirs>=1.4.3 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (1.4.4)
Requirement already satisfied: docker-pycreds>=0.4.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (0.4.0)
Requirement already satisfied: Click!=8.0.0,>=7.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (8.1.3)
Requirement already satisfied: setproctitle in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (1.3.2)
Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (4.22.3)
Requirement already satisfied: sentry-sdk>=1.0.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (1.19.1)
Requirement already satisfied: psutil>=5.0.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (5.9.4)
Requirement already satisfied: setuptools in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from wandb) (67.6.1)
Requirement already satisfied: torch>=1.8.1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from torchmetrics) (1.13.1)
Collecting lightning-utilities<2.0,>=0.7.0
  Using cached lightning_utilities-0.8.0-py3-none-any.whl (20 kB)
Collecting arrow<3.0,>=1.2.0
  Using cached arrow-1.2.3-py3-none-any.whl (66 kB)
Collecting fastapi<0.89.0,>=0.69.0
  Using cached fastapi-0.88.0-py3-none-any.whl (55 kB)
Collecting pytorch-lightning
  Downloading pytorch_lightning-2.0.3-py3-none-any.whl (720 kB)
     ---------------------------------------- 0.0/720.6 kB ? eta -:--:--
     ------------------------------------- 720.6/720.6 kB 44.4 MB/s eta 0:00:00
Requirement already satisfied: traitlets<7.0,>=5.3.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from lightning) (5.9.0)
Requirement already satisfied: Jinja2<5.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from lightning) (3.1.2)
Collecting starsessions<2.0,>=1.2.1
  Using cached starsessions-1.3.0-py3-none-any.whl (10 kB)
Collecting deepdiff<8.0,>=5.7.0
  Using cached deepdiff-6.3.0-py3-none-any.whl (69 kB)
Collecting croniter<1.4.0,>=1.3.0
  Using cached croniter-1.3.15-py2.py3-none-any.whl (19 kB)
Requirement already satisfied: urllib3<3.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from lightning) (1.26.15)
Collecting python-multipart<2.0,>=0.0.5
  Using cached python_multipart-0.0.6-py3-none-any.whl (45 kB)
Requirement already satisfied: typing-extensions<6.0,>=4.0.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from lightning) (4.5.0)
Requirement already satisfied: fsspec<2024.0,>=2022.5.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from lightning) (2023.4.0)
Collecting starlette
  Downloading starlette-0.28.0-py3-none-any.whl (68 kB)
     ---------------------------------------- 0.0/68.9 kB ? eta -:--:--
     ---------------------------------------- 68.9/68.9 kB 3.7 MB/s eta 0:00:00
Collecting lightning-cloud>=0.5.34
  Using cached lightning_cloud-0.5.36-py3-none-any.whl (562 kB)
Collecting websockets<12.0
  Using cached websockets-11.0.3-cp310-cp310-win_amd64.whl (124 kB)
Collecting dateutils<2.0
  Using cached dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)
Requirement already satisfied: pydantic<4.0,>=1.7.4 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from lightning) (1.10.7)
Collecting beautifulsoup4<6.0,>=4.8.0
  Using cached beautifulsoup4-4.12.2-py3-none-any.whl (142 kB)
Collecting rich<15.0,>=12.3.0
  Using cached rich-13.4.1-py3-none-any.whl (239 kB)
Collecting inquirer<5.0,>=2.10.0
  Using cached inquirer-3.1.3-py3-none-any.whl (18 kB)
Collecting uvicorn<2.0
  Using cached uvicorn-0.22.0-py3-none-any.whl (58 kB)
Collecting websocket-client<3.0
  Using cached websocket_client-1.5.2-py3-none-any.whl (56 kB)
Requirement already satisfied: python-dateutil>=2.7.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from arrow<3.0,>=1.2.0->lightning) (2.8.2)
Collecting soupsieve>1.2
  Using cached soupsieve-2.4.1-py3-none-any.whl (36 kB)
Requirement already satisfied: colorama in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from Click!=8.0.0,>=7.0->wandb) (0.4.6)
Requirement already satisfied: pytz in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from dateutils<2.0->lightning) (2023.3)
Collecting ordered-set<4.2.0,>=4.0.2
  Using cached ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Requirement already satisfied: six>=1.4.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from docker-pycreds>=0.4.0->wandb) (1.16.0)
Collecting starlette
  Using cached starlette-0.22.0-py3-none-any.whl (64 kB)
Collecting anyio<5,>=3.4.0
  Using cached anyio-3.7.0-py3-none-any.whl (80 kB)
Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from fsspec<2024.0,>=2022.5.0->lightning) (3.8.4)
Requirement already satisfied: gitdb<5,>=4.0.1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from GitPython!=3.1.29,>=1.0.0->wandb) (4.0.10)
Collecting readchar>=3.0.6
  Using cached readchar-4.0.5-py3-none-any.whl (8.5 kB)
Collecting python-editor>=1.0.4
  Using cached python_editor-1.0.4-py3-none-any.whl (4.9 kB)
Collecting blessed>=1.19.0
  Using cached blessed-1.20.0-py2.py3-none-any.whl (58 kB)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from Jinja2<5.0->lightning) (2.1.2)
Collecting pyjwt
  Using cached PyJWT-2.7.0-py3-none-any.whl (22 kB)
Requirement already satisfied: idna<4,>=2.5 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from requests->transformers) (3.4)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from requests->transformers) (3.1.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from requests->transformers) (2022.12.7)
Collecting markdown-it-py<3.0.0,>=2.2.0
  Using cached markdown_it_py-2.2.0-py3-none-any.whl (84 kB)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from rich<15.0,>=12.3.0->lightning) (2.15.0)
Collecting itsdangerous<3.0.0,>=2.0.1
  Using cached itsdangerous-2.1.2-py3-none-any.whl (15 kB)
Collecting h11>=0.8
  Using cached h11-0.14.0-py3-none-any.whl (58 kB)
Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec<2024.0,>=2022.5.0->lightning) (4.0.2)
Requirement already satisfied: aiosignal>=1.1.2 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec<2024.0,>=2022.5.0->lightning) (1.3.1)
Requirement already satisfied: yarl<2.0,>=1.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec<2024.0,>=2022.5.0->lightning) (1.8.2)
Requirement already satisfied: frozenlist>=1.1.1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec<2024.0,>=2022.5.0->lightning) (1.3.3)
Requirement already satisfied: multidict<7.0,>=4.5 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec<2024.0,>=2022.5.0->lightning) (6.0.4)
Requirement already satisfied: attrs>=17.3.0 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec<2024.0,>=2022.5.0->lightning) (22.2.0)
Collecting sniffio>=1.1
  Using cached sniffio-1.3.0-py3-none-any.whl (10 kB)
Collecting exceptiongroup
  Using cached exceptiongroup-1.1.1-py3-none-any.whl (14 kB)
Collecting jinxed>=1.1.0
  Using cached jinxed-1.2.0-py2.py3-none-any.whl (33 kB)
Requirement already satisfied: wcwidth>=0.1.4 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from blessed>=1.19.0->inquirer<5.0,>=2.10.0->lightning) (0.2.6)
Requirement already satisfied: smmap<6,>=3.0.1 in c:\users\froro\onedrive\escritorio\unal\rna\financia\env\lib\site-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb) (5.0.0)
Collecting mdurl~=0.1
  Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)
Collecting ansicon
  Using cached ansicon-1.89.0-py2.py3-none-any.whl (63 kB)
Installing collected packages: python-editor, ansicon, websockets, websocket-client, soupsieve, sniffio, readchar, python-multipart, pyjwt, ordered-set, mdurl, lightning-utilities, jinxed, itsdangerous, h11, exceptiongroup, uvicorn, torchmetrics, markdown-it-py, deepdiff, dateutils, croniter, blessed, beautifulsoup4, arrow, anyio, starlette, rich, inquirer, starsessions, pytorch-lightning, fastapi, lightning-cloud, lightning
Successfully installed ansicon-1.89.0 anyio-3.7.0 arrow-1.2.3 beautifulsoup4-4.12.2 blessed-1.20.0 croniter-1.3.15 dateutils-0.6.12 deepdiff-6.3.0 exceptiongroup-1.1.1 fastapi-0.88.0 h11-0.14.0 inquirer-3.1.3 itsdangerous-2.1.2 jinxed-1.2.0 lightning-2.0.3 lightning-cloud-0.5.36 lightning-utilities-0.8.0 markdown-it-py-2.2.0 mdurl-0.1.2 ordered-set-4.1.0 pyjwt-2.7.0 python-editor-1.0.4 python-multipart-0.0.6 pytorch-lightning-2.0.3 readchar-4.0.5 rich-13.4.1 sniffio-1.3.0 soupsieve-2.4.1 starlette-0.22.0 starsessions-1.3.0 torchmetrics-0.11.4 uvicorn-0.22.0 websocket-client-1.5.2 websockets-11.0.3

[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import wandb
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import set_seed
from torch import nn

def SEED(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    set_seed(seed)


SEED(42)


# Configuration by model
NUM_VARAIBLES = 3
NUM_LABELS = 3
num_labels = NUM_LABELS * NUM_VARAIBLES
divice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


from transformers import ( 
    AutoModelForSequenceClassification,
    AutoTokenizer,
    get_constant_schedule_with_warmup,
)

# Configuring the model
num_labels = NUM_LABELS * NUM_VARAIBLES
model_name = "pysentimiento/robertuito-sentiment-analysis"
auto_tokenizer = AutoTokenizer.from_pretrained(model_name)
model_hugginface = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, ignore_mismatched_sizes=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/925 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/435M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at pysentimiento/robertuito-sentiment-analysis and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([9, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([9]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


class FinanciaSentimental(Dataset):
    """This class is used to load the data and tokenize it"""
    def __init__(self, tokenizer, dataframe, columns, max_len=512):
        self.tokenizer = tokenizer
        self.dataframe = dataframe
        ## Columns to target
        self._columns = columns
        self.max_len = max_len
    
    @property
    def columns(self):
        """Return the columns to target"""
        return self._columns

    def __len__(self):
        """Return the length of the dataset"""
        return len(self.dataframe)
        
    def __getitem__(self, index):
        """Get the data at the index"""
        values = self.dataframe.iloc[index]
        text = values['text']
        label = values[self._columns].values.astype(np.float32)
        inputs = self.tokenizer.encode_plus(text, max_length=130, pad_to_max_length=True, padding='max_length', truncation=True, return_tensors='pt')
        label = torch.tensor(label, dtype=torch.float)
        input_ids = inputs["input_ids"].squeeze().to(dtype=torch.long)
        attention_mask = inputs["attention_mask"].squeeze().to(dtype=torch.long)
        token_type_ids = inputs["token_type_ids"].squeeze().to(dtype=torch.long)
        
        inputs_dict = {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "token_type_ids": token_type_ids,
            "labels":label
        }

        return inputs_dict


import torch
import lightning.pytorch as pl
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from torch.nn import BCEWithLogitsLoss

class FinanciaMultilabel(pl.LightningModule):
    
    def __init__(self, model, num_labels):
        super().__init__()
        self.model = model
        self.num_labels = num_labels
        self.loss = BCEWithLogitsLoss()
        self.validation_step_outputs = []

    def forward(self, input_ids, attention_mask, token_type_ids):
        return self.model(input_ids, attention_mask, token_type_ids).logits

    def training_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]
        token_type_ids = batch["token_type_ids"]
        outputs = self(input_ids, attention_mask, token_type_ids)
        loss = self.loss(outputs.view(-1,self.num_labels), labels.type_as(outputs).view(-1,self.num_labels))
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]
        token_type_ids = batch["token_type_ids"]
        outputs = self(input_ids, attention_mask, token_type_ids)
        loss = self.loss(outputs.view(-1,self.num_labels), labels.type_as(outputs).view(-1,self.num_labels))
        pred_labels = torch.sigmoid(outputs)
        info = {'val_loss': loss, 'pred_labels': pred_labels, 'labels': labels}
        self.validation_step_outputs.append(info)
        return 

    def on_validation_epoch_end(self):
        outputs = self.validation_step_outputs
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        pred_labels = torch.cat([x['pred_labels'] for x in outputs])
        labels = torch.cat([x['labels'] for x in outputs])
        threshold = 0.50
        pred_bools = pred_labels > threshold
        true_bools = labels == 1
        val_f1_accuracy = f1_score(true_bools.cpu(), pred_bools.cpu(), average='micro')*100
        val_flat_accuracy = accuracy_score(true_bools.cpu(), pred_bools.cpu())*100
        self.log('val_loss', avg_loss)
        self.log('val_f1_accuracy', val_f1_accuracy, prog_bar=True)
        self.log('val_flat_accuracy', val_flat_accuracy, prog_bar=True)
        self.validation_step_outputs.clear() 

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=2e-5)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, min_lr=1e-6)
        return {
                'optimizer': optimizer,
                'lr_scheduler': {
                    'scheduler': scheduler,
                    'monitor': 'val_loss'
                }
            }


train_df = "/content/drive/Shareddrives/Redes neuronales/Datasets/df_with_sentiment.csv"
df = pd.read_csv(train_df)
df = df[["id",	"text",	"target",	"target_sentiment",	"companies_sentiment",	"consumers_sentiment", "tag"]]
df = pd.get_dummies(df, columns = ["target_sentiment",	"companies_sentiment","consumers_sentiment"])
df_train = df[df.tag == "train"]
df_test = df[df.tag == "test"]
df_valid, df_test = train_test_split(df_test, test_size=0.5)


columns_varaibles = ["target_sentiment_negative",	"target_sentiment_neutral",	"target_sentiment_positive"	,"companies_sentiment_negative"	,"companies_sentiment_neutral",	"companies_sentiment_positive", 'consumers_sentiment_negative',
       'consumers_sentiment_neutral', 'consumers_sentiment_positive']


train_dataset = FinanciaSentimental(auto_tokenizer, df_train, columns_varaibles)
valid_dataset = FinanciaSentimental(auto_tokenizer, df_valid, columns_varaibles)
test_dataset = FinanciaSentimental(auto_tokenizer, df_test, columns_varaibles)


train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True)


from pytorch_lightning.loggers import WandbLogger
wandb_logger = WandbLogger(project='FinancIA', name='#1', save_code=True, log_model=False, sync_tensorboard=True, save_dir="./logs")

wandb: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
wandb: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········

wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
wandb: WARNING Path ./logs/wandb/ wasn't writable, using system temp directory.


from  lightning.pytorch import Trainer
from  lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from  lightning.pytorch.callbacks.progress import TQDMProgressBar
from  lightning.pytorch.loggers import WandbLogger
from torch.utils.data import DataLoader, WeightedRandomSampler

checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="max", save_last=True, save_weights_only=True)
tqdm_callback = TQDMProgressBar(refresh_rate=1)
trainer = pl.Trainer( accelerator="cuda", max_epochs=10, logger=wandb_logger, callbacks=[checkpoint_callback, tqdm_callback], precision=16,)
model = FinanciaMultilabel(model_hugginface,9)
wandb_logger.watch(model, log="all")
trainer.fit(model,train_dataloader,  valid_dataloader)

/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type                             | Params
-----------------------------------------------------------
0 | model | RobertaForSequenceClassification | 108 M 
1 | loss  | BCEWithLogitsLoss                | 0     
-----------------------------------------------------------
108 M     Trainable params
0         Non-trainable params
108 M     Total params
435.183   Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name  | Type                             | Params
-----------------------------------------------------------
0 | model | RobertaForSequenceClassification | 108 M 
1 | loss  | BCEWithLogitsLoss                | 0     
-----------------------------------------------------------
108 M     Trainable params
0         Non-trainable params
108 M     Total params
435.183   Total estimated model params size (MB)

Sanity Checking: 0it [00:00, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:480: PossibleUserWarning: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
  rank_zero_warn(
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:280: PossibleUserWarning: The number of training batches (46) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
  rank_zero_warn(

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]


!zip -r logs.zip logs

  adding: logs/ (stored 0%)
  adding: logs/wandb/ (stored 0%)
  adding: logs/FinancIA/ (stored 0%)
  adding: logs/FinancIA/1y2tfqnr/ (stored 0%)
  adding: logs/FinancIA/1y2tfqnr/checkpoints/ (stored 0%)
  adding: logs/FinancIA/1y2tfqnr/checkpoints/last.ckpt (deflated 7%)
  adding: logs/FinancIA/1y2tfqnr/checkpoints/epoch=7-step=368.ckpt (deflated 7%)


device = "cuda"
model = FinanciaMultilabel.load_from_checkpoint(
        "/content/logs/FinancIA/1y2tfqnr/checkpoints/epoch=7-step=368.ckpt",
        model=model_hugginface,
        num_labels=num_labels,
        map_location=device
    )


RETURN_VALUES = [
    "target_sentiment_negative",
    "target_sentiment_neutral",
    "target_sentiment_positive",
    "companies_sentiment_negative",
    "companies_sentiment_neutral",
    "companies_sentiment_positive",
    "consumers_sentiment_negative",
    "consumers_sentiment_neutral",
    "consumers_sentiment_positive"
]


from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
model.eval()
device = "cuda"
# Paso 3: procesar los datos de entrada y obtener las predicciones
all_preds = []
labels = []
with torch.no_grad():
    for batch in test_dataloader:
        b_input_ids = batch["input_ids"].to(device)
        b_input_mask = batch["attention_mask"].to(device)
        b_labels = batch["labels"]
        token_type_ids = batch["token_type_ids"]
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        preds = torch.sigmoid(outputs).detach().cpu().numpy()
        labels.append(b_labels.numpy())
        all_preds.append(preds)

# Paso 4: procesar las predicciones
all_preds = np.concatenate(all_preds)
binary_preds = (all_preds > 0.5).astype(int)

# Paso 5: evaluar el rendimiento del modelo
test_labels = np.concatenate(labels)


from sklearn.metrics import classification_report, multilabel_confusion_matrix
print(classification_report(test_labels, binary_preds, target_names=RETURN_VALUES))

                              precision    recall  f1-score   support

   target_sentiment_negative       0.60      0.96      0.74        27
    target_sentiment_neutral       0.50      0.50      0.50         4
   target_sentiment_positive       0.91      0.59      0.72        54
companies_sentiment_negative       0.56      0.54      0.55        28
 companies_sentiment_neutral       0.57      0.50      0.53        40
companies_sentiment_positive       0.71      0.29      0.42        17
consumers_sentiment_negative       0.59      0.59      0.59        17
 consumers_sentiment_neutral       0.74      0.81      0.77        48
consumers_sentiment_positive       0.61      0.55      0.58        20

                   micro avg       0.67      0.63      0.65       255
                   macro avg       0.64      0.59      0.60       255
                weighted avg       0.69      0.63      0.64       255
                 samples avg       0.68      0.63      0.65       255


import seaborn as sns
import matplotlib.pyplot as plt

values = multilabel_confusion_matrix(test_labels, binary_preds)
heatmap  = sns.heatmap(values[0],annot=True)
plt.title(RETURN_VALUES[0] )

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[1],annot=True)
plt.title(RETURN_VALUES[1])

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[2],annot=True)
plt.title(RETURN_VALUES[2] )

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[3],annot=True)
plt.title(RETURN_VALUES[3] )

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[4],annot=True)
plt.title(RETURN_VALUES[4])

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[5],annot=True)
plt.title(RETURN_VALUES[5] )

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[6],annot=True)
plt.title(RETURN_VALUES[6] )

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[7],annot=True)
plt.title(RETURN_VALUES[7] )

# Mostrar el heatmap
plt.show()


heatmap  = sns.heatmap(values[8],annot=True)
plt.title(RETURN_VALUES[8])

# Mostrar el heatmap
plt.show()


import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# test_labels: etiquetas verdaderas del conjunto de prueba
# binary_preds: predicciones binarias del modelo para el conjunto de prueba
# RETURN_VALUES: nombres de las clases

# Calcular las curvas ROC y AUC para cada clase
fpr = dict()
tpr = dict()
roc_auc = dict()
n_classes = len(RETURN_VALUES)
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], binary_preds[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Calcular la curva ROC micro
fpr_micro, tpr_micro, _ = roc_curve(test_labels.ravel(), binary_preds.ravel())
roc_auc_micro = auc(fpr_micro, tpr_micro)

# Calcular la curva ROC macro
all_fpr = np.unique(np.concatenate(list(fpr.values())))
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= n_classes
fpr_macro = all_fpr
tpr_macro = mean_tpr
roc_auc_macro = auc(fpr_macro, tpr_macro)

# Plotear las curvas ROC
plt.figure(figsize=(10, 8))

# Plotear las curvas ROC para cada clase
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label='Curva ROC {} (AUC = {:.2f})'.format(RETURN_VALUES[i], roc_auc[i]))

# Plotear la curva ROC micro
plt.plot(fpr_micro, tpr_micro, label='Curva ROC micro (AUC = {:.2f})'.format(roc_auc_micro), color='deeppink', linestyle=':', linewidth=4)

# Plotear la curva ROC macro
plt.plot(fpr_macro, tpr_macro, label='Curva ROC macro (AUC = {:.2f})'.format(roc_auc_macro), color='navy', linestyle=':', linewidth=4)

plt.plot([0, 1], [0, 1], 'k--', linewidth=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curvas AUC-ROC para Clasificación Multietiqueta')
plt.legend(loc="lower right")
plt.show()

Entrenamiento e inferencia del modelo¶

Modelo pre-entrenado¶

Clase FinanciaSentimental¶

Clase de entrenamiento¶

Cargamos el dataset en pandas¶

Entrenamiento¶

Registro de las métricas¶

Entrenamiento¶

Inferencia del modelo¶

Modulo de testeo¶

Matrices de confunción¶

Curva ROC y AUC¶